misp_analyzer.py aktualisiert
This commit is contained in:
		
							parent
							
								
									a26e2fb0ad
								
							
						
					
					
						commit
						7404c0ee8d
					
				
							
								
								
									
										489
									
								
								misp_analyzer.py
									
									
									
									
									
								
							
							
						
						
									
										489
									
								
								misp_analyzer.py
									
									
									
									
									
								
							@ -1,9 +1,12 @@
 | 
				
			|||||||
"""Index analyzer plugin for MISP."""
 | 
					"""Index analyzer plugin for MISP - Enhanced for large-scale processing."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import ntpath
 | 
					import ntpath
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					from typing import List, Dict, Set, Any
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from flask import current_app
 | 
					from flask import current_app
 | 
				
			||||||
from timesketch.lib.analyzers import interface
 | 
					from timesketch.lib.analyzers import interface
 | 
				
			||||||
@ -14,11 +17,11 @@ logger = logging.getLogger("timesketch.analyzers.misp")
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class MispAnalyzer(interface.BaseAnalyzer):
 | 
					class MispAnalyzer(interface.BaseAnalyzer):
 | 
				
			||||||
    """Analyzer for MISP."""
 | 
					    """Enhanced Analyzer for MISP with large-scale processing capabilities."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    NAME = "misp_analyzer"
 | 
					    NAME = "misp_analyzer"
 | 
				
			||||||
    DISPLAY_NAME = "MISP"
 | 
					    DISPLAY_NAME = "MISP Enhanced"
 | 
				
			||||||
    DESCRIPTION = "Mark events using MISP"
 | 
					    DESCRIPTION = "Mark events using MISP with cross-org and large-scale support"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, index_name, sketch_id, timeline_id=None, **kwargs):
 | 
					    def __init__(self, index_name, sketch_id, timeline_id=None, **kwargs):
 | 
				
			||||||
        """Initialize the Analyzer."""
 | 
					        """Initialize the Analyzer."""
 | 
				
			||||||
@ -30,285 +33,431 @@ class MispAnalyzer(interface.BaseAnalyzer):
 | 
				
			|||||||
        self._query_string = kwargs.get("query_string")
 | 
					        self._query_string = kwargs.get("query_string")
 | 
				
			||||||
        self._attr = kwargs.get("attr")
 | 
					        self._attr = kwargs.get("attr")
 | 
				
			||||||
        self._timesketch_attr = kwargs.get("timesketch_attr")
 | 
					        self._timesketch_attr = kwargs.get("timesketch_attr")
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Enhanced configuration
 | 
				
			||||||
 | 
					        self.include_community = kwargs.get("include_community", True)
 | 
				
			||||||
 | 
					        self.batch_size = kwargs.get("batch_size", 100)  # Process events in batches
 | 
				
			||||||
 | 
					        self.api_batch_size = kwargs.get("api_batch_size", 50)  # API call batching
 | 
				
			||||||
 | 
					        self.max_retries = kwargs.get("max_retries", 3)
 | 
				
			||||||
 | 
					        self.request_timeout = kwargs.get("request_timeout", 120)  # 2 minutes
 | 
				
			||||||
 | 
					        self.chunk_size = kwargs.get("chunk_size", 1000)  # Memory management
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Regex patterns
 | 
				
			||||||
        self.ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
 | 
					        self.ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
 | 
				
			||||||
        # Track marked events to prevent duplicates
 | 
					        
 | 
				
			||||||
 | 
					        # Track processed items to prevent duplicates
 | 
				
			||||||
        self.marked_events = set()
 | 
					        self.marked_events = set()
 | 
				
			||||||
 | 
					        self.processed_indicators = set()
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Statistics
 | 
				
			||||||
 | 
					        self.stats = {
 | 
				
			||||||
 | 
					            'events_processed': 0,
 | 
				
			||||||
 | 
					            'indicators_extracted': 0,
 | 
				
			||||||
 | 
					            'api_calls_made': 0,
 | 
				
			||||||
 | 
					            'events_marked': 0,
 | 
				
			||||||
 | 
					            'errors': 0
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def get_kwargs():
 | 
					    def get_kwargs():
 | 
				
			||||||
        """Get kwargs for the analyzer."""
 | 
					        """Get kwargs for the analyzer with enhanced options."""
 | 
				
			||||||
        to_query = [
 | 
					        to_query = [
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                "query_string": "md5_hash:*",
 | 
					                "query_string": "md5_hash:*",
 | 
				
			||||||
                "attr": "md5",
 | 
					                "attr": "md5",
 | 
				
			||||||
                "timesketch_attr": "md5_hash",
 | 
					                "timesketch_attr": "md5_hash",
 | 
				
			||||||
 | 
					                "include_community": True,
 | 
				
			||||||
 | 
					                "batch_size": 100,
 | 
				
			||||||
 | 
					                "api_batch_size": 50,
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                "query_string": "sha1_hash:*",
 | 
					                "query_string": "sha1_hash:*",
 | 
				
			||||||
                "attr": "sha1",
 | 
					                "attr": "sha1",
 | 
				
			||||||
                "timesketch_attr": "sha1_hash",
 | 
					                "timesketch_attr": "sha1_hash",
 | 
				
			||||||
 | 
					                "include_community": True,
 | 
				
			||||||
 | 
					                "batch_size": 100,
 | 
				
			||||||
 | 
					                "api_batch_size": 50,
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                "query_string": "sha256_hash:*",
 | 
					                "query_string": "sha256_hash:*",
 | 
				
			||||||
                "attr": "sha256",
 | 
					                "attr": "sha256",
 | 
				
			||||||
                "timesketch_attr": "sha256_hash",
 | 
					                "timesketch_attr": "sha256_hash",
 | 
				
			||||||
 | 
					                "include_community": True,
 | 
				
			||||||
 | 
					                "batch_size": 100,
 | 
				
			||||||
 | 
					                "api_batch_size": 50,
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                "query_string": "filename:*",
 | 
					                "query_string": "filename:*",
 | 
				
			||||||
                "attr": "filename",
 | 
					                "attr": "filename",
 | 
				
			||||||
                "timesketch_attr": "filename",
 | 
					                "timesketch_attr": "filename",
 | 
				
			||||||
 | 
					                "include_community": True,
 | 
				
			||||||
 | 
					                "batch_size": 100,
 | 
				
			||||||
 | 
					                "api_batch_size": 50,
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                "query_string": "message:*",
 | 
					                "query_string": "message:*",
 | 
				
			||||||
                "attr": "ip",  # Generic IP instead of ip-src/ip-dst
 | 
					                "attr": "ip",
 | 
				
			||||||
                "timesketch_attr": "message",
 | 
					                "timesketch_attr": "message",
 | 
				
			||||||
 | 
					                "include_community": True,
 | 
				
			||||||
 | 
					                "batch_size": 100,
 | 
				
			||||||
 | 
					                "api_batch_size": 50,
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                "query_string": "source_ip:*",
 | 
					                "query_string": "source_ip:* OR src_ip:* OR client_ip:*",
 | 
				
			||||||
                "attr": "ip",
 | 
					                "attr": "ip",
 | 
				
			||||||
                "timesketch_attr": "source_ip",
 | 
					                "timesketch_attr": "source_ip",
 | 
				
			||||||
 | 
					                "include_community": True,
 | 
				
			||||||
 | 
					                "batch_size": 100,
 | 
				
			||||||
 | 
					                "api_batch_size": 50,
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
        return to_query
 | 
					        return to_query
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _is_valid_ip(self, ip_str):
 | 
					    def _is_valid_ip(self, ip_str: str) -> bool:
 | 
				
			||||||
        """Validate IP address."""
 | 
					        """Validate IP address with enhanced filtering."""
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            import ipaddress
 | 
					            import ipaddress
 | 
				
			||||||
            ip_str = ip_str.strip()
 | 
					            ip_str = ip_str.strip()
 | 
				
			||||||
            ipaddress.ip_address(ip_str)
 | 
					            ip_obj = ipaddress.ip_address(ip_str)
 | 
				
			||||||
            if ip_str.startswith(('0.', '127.', '255.255.255.255')):
 | 
					            
 | 
				
			||||||
 | 
					            # Filter out private, loopback, and other non-routable IPs
 | 
				
			||||||
 | 
					            if (ip_obj.is_private or ip_obj.is_loopback or 
 | 
				
			||||||
 | 
					                ip_obj.is_multicast or ip_obj.is_reserved or
 | 
				
			||||||
 | 
					                ip_obj.is_link_local):
 | 
				
			||||||
                return False
 | 
					                return False
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					            # Additional nginx log specific filters
 | 
				
			||||||
 | 
					            if ip_str.startswith(('0.', '255.255.255.255', '169.254.')):
 | 
				
			||||||
 | 
					                return False
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
            return True
 | 
					            return True
 | 
				
			||||||
        except (ValueError, AttributeError):
 | 
					        except (ValueError, AttributeError):
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _is_valid_hash(self, hash_str, hash_type):
 | 
					    def _is_valid_hash(self, hash_str: str, hash_type: str) -> bool:
 | 
				
			||||||
        """Validate hash format."""
 | 
					        """Validate hash format."""
 | 
				
			||||||
        if not hash_str:
 | 
					        if not hash_str:
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
        hash_str = hash_str.strip().lower()
 | 
					        hash_str = hash_str.strip().lower()
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        if hash_type == "md5":
 | 
					        hash_lengths = {"md5": 32, "sha1": 40, "sha256": 64}
 | 
				
			||||||
            return len(hash_str) == 32 and all(c in '0123456789abcdef' for c in hash_str)
 | 
					        expected_length = hash_lengths.get(hash_type)
 | 
				
			||||||
        elif hash_type == "sha1":
 | 
					 | 
				
			||||||
            return len(hash_str) == 40 and all(c in '0123456789abcdef' for c in hash_str)
 | 
					 | 
				
			||||||
        elif hash_type == "sha256":
 | 
					 | 
				
			||||||
            return len(hash_str) == 64 and all(c in '0123456789abcdef' for c in hash_str)
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					        if not expected_length:
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
    def query_misp_single(self, value, attr):
 | 
					        return (len(hash_str) == expected_length and 
 | 
				
			||||||
        """Query MISP for a single value - ENHANCED for cross-org visibility."""
 | 
					                all(c in '0123456789abcdef' for c in hash_str))
 | 
				
			||||||
        results = []
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Query both ip-src and ip-dst for IPs, include cross-org events
 | 
					    def _make_misp_request(self, payload: Dict[str, Any], retry_count: int = 0) -> List[Dict]:
 | 
				
			||||||
 | 
					        """Make MISP API request with retry logic."""
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            response = requests.post(
 | 
				
			||||||
 | 
					                f"{self.misp_url}/attributes/restSearch/",
 | 
				
			||||||
 | 
					                json=payload,
 | 
				
			||||||
 | 
					                headers={"Authorization": self.misp_api_key},
 | 
				
			||||||
 | 
					                verify=False,
 | 
				
			||||||
 | 
					                timeout=self.request_timeout,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            if response.status_code == 200:
 | 
				
			||||||
 | 
					                data = response.json()
 | 
				
			||||||
 | 
					                return data.get("response", {}).get("Attribute", [])
 | 
				
			||||||
 | 
					            elif response.status_code == 429:  # Rate limited
 | 
				
			||||||
 | 
					                wait_time = min(2 ** retry_count, 60)  # Exponential backoff, max 60s
 | 
				
			||||||
 | 
					                logger.warning(f"Rate limited, waiting {wait_time}s before retry")
 | 
				
			||||||
 | 
					                time.sleep(wait_time)
 | 
				
			||||||
 | 
					                raise requests.exceptions.RequestException("Rate limited")
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                logger.warning(f"MISP API returned status {response.status_code}")
 | 
				
			||||||
 | 
					                return []
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
 | 
				
			||||||
 | 
					            if retry_count < self.max_retries:
 | 
				
			||||||
 | 
					                wait_time = min(2 ** retry_count * 5, 120)  # Exponential backoff
 | 
				
			||||||
 | 
					                logger.warning(f"Request failed (attempt {retry_count + 1}), retrying in {wait_time}s: {e}")
 | 
				
			||||||
 | 
					                time.sleep(wait_time)
 | 
				
			||||||
 | 
					                return self._make_misp_request(payload, retry_count + 1)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                logger.error(f"Request failed after {self.max_retries} retries: {e}")
 | 
				
			||||||
 | 
					                self.stats['errors'] += 1
 | 
				
			||||||
 | 
					                return []
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            logger.error(f"Unexpected error in MISP request: {e}")
 | 
				
			||||||
 | 
					            self.stats['errors'] += 1
 | 
				
			||||||
 | 
					            return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def query_misp_batch(self, indicators: List[str], attr: str) -> Dict[str, List[Dict]]:
 | 
				
			||||||
 | 
					        """Query MISP for multiple indicators efficiently."""
 | 
				
			||||||
 | 
					        results = defaultdict(list)
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Determine search types based on attribute
 | 
				
			||||||
        if attr == "ip":
 | 
					        if attr == "ip":
 | 
				
			||||||
            search_types = ["ip-src", "ip-dst"]
 | 
					            search_types = ["ip-src", "ip-dst"]
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            search_types = [attr]
 | 
					            search_types = [attr]
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        for search_type in search_types:
 | 
					        for search_type in search_types:
 | 
				
			||||||
            try:
 | 
					            # Batch indicators to reduce API calls
 | 
				
			||||||
                # Include events from other organizations
 | 
					            for i in range(0, len(indicators), self.api_batch_size):
 | 
				
			||||||
 | 
					                batch = indicators[i:i + self.api_batch_size]
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                # Build payload with distribution settings
 | 
				
			||||||
 | 
					                distribution_levels = [0]  # Own org
 | 
				
			||||||
 | 
					                if self.include_community:
 | 
				
			||||||
 | 
					                    distribution_levels.extend([1, 2])  # Community and connected orgs
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
                payload = {
 | 
					                payload = {
 | 
				
			||||||
                    "returnFormat": "json",
 | 
					                    "returnFormat": "json",
 | 
				
			||||||
                    "value": value, 
 | 
					                    "value": batch,
 | 
				
			||||||
                    "type": search_type,
 | 
					                    "type": search_type,
 | 
				
			||||||
                    # Include events from all organizations with proper distribution
 | 
					                    "enforceWarninglist": False,
 | 
				
			||||||
                    "enforceWarninglist": False,  # Don't filter known-good IPs
 | 
					                    "includeDecayScore": False,
 | 
				
			||||||
                    "includeDecayScore": False,   # Skip decay scores for speed
 | 
					                    "includeFullModel": False,
 | 
				
			||||||
                    "includeFullModel": False,    # Skip full model for speed
 | 
					                    "excludeDecayed": False,
 | 
				
			||||||
                    "decayingModel": [],          # No decaying model filters
 | 
					                    "distribution": distribution_levels,
 | 
				
			||||||
                    "excludeDecayed": False,      # Include older indicators
 | 
					                    "limit": 10000,  # High limit for large datasets
 | 
				
			||||||
                    # Distribution levels: 0=Own org, 1=Community, 2=Connected, 3=All, 5=Inherit
 | 
					 | 
				
			||||||
                    "distribution": [0, 1]  
 | 
					 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                response = requests.post(
 | 
					                self.stats['api_calls_made'] += 1
 | 
				
			||||||
                    f"{self.misp_url}/attributes/restSearch/",
 | 
					                logger.info(f"Querying MISP for {len(batch)} {search_type} indicators (call #{self.stats['api_calls_made']})")
 | 
				
			||||||
                    json=payload,
 | 
					 | 
				
			||||||
                    headers={"Authorization": self.misp_api_key},
 | 
					 | 
				
			||||||
                    verify=False,
 | 
					 | 
				
			||||||
                    timeout=30,
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                if response.status_code == 200:
 | 
					                batch_results = self._make_misp_request(payload)
 | 
				
			||||||
                    data = response.json()
 | 
					 | 
				
			||||||
                    attributes = data.get("response", {}).get("Attribute", [])
 | 
					 | 
				
			||||||
                    results.extend(attributes)
 | 
					 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
            except Exception:
 | 
					                # Group results by indicator value
 | 
				
			||||||
                continue
 | 
					                for result in batch_results:
 | 
				
			||||||
 | 
					                    indicator_value = result.get("value", "").strip()
 | 
				
			||||||
 | 
					                    if indicator_value in batch:
 | 
				
			||||||
 | 
					                        results[indicator_value].append(result)
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
        return results
 | 
					                # Rate limiting courtesy pause
 | 
				
			||||||
 | 
					                time.sleep(0.5)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    def mark_event(self, event, result, attr):
 | 
					        return dict(results)
 | 
				
			||||||
        """Add MISP intelligence to event - FIXED to prevent duplicates."""
 | 
					
 | 
				
			||||||
 | 
					    def extract_indicators_from_event(self, event: Any, attr: str, timesketch_attr: str) -> List[str]:
 | 
				
			||||||
 | 
					        """Extract indicators from a single event."""
 | 
				
			||||||
 | 
					        loc = event.source.get(timesketch_attr)
 | 
				
			||||||
 | 
					        if not loc:
 | 
				
			||||||
 | 
					            return []
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        indicators = []
 | 
				
			||||||
 | 
					        loc_str = str(loc)
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        if attr == "ip" and timesketch_attr == "message":
 | 
				
			||||||
 | 
					            # Extract IPs from nginx access log messages
 | 
				
			||||||
 | 
					            ip_matches = self.ip_pattern.findall(loc_str)
 | 
				
			||||||
 | 
					            indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					        elif attr == "ip" and timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
 | 
				
			||||||
 | 
					            if self._is_valid_ip(loc_str):
 | 
				
			||||||
 | 
					                indicators = [loc_str]
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					        elif attr in ["md5", "sha1", "sha256"]:
 | 
				
			||||||
 | 
					            if self._is_valid_hash(loc_str, attr):
 | 
				
			||||||
 | 
					                indicators = [loc_str]
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					        elif attr == "filename":
 | 
				
			||||||
 | 
					            filename = ntpath.basename(loc_str)
 | 
				
			||||||
 | 
					            if filename and len(filename) > 3:  # Meaningful filename
 | 
				
			||||||
 | 
					                indicators = [filename]
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        return indicators
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def mark_event_with_intel(self, event: Any, misp_results: List[Dict], attr: str) -> None:
 | 
				
			||||||
 | 
					        """Mark event with MISP intelligence."""
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            # Check if event already marked
 | 
					 | 
				
			||||||
            event_id = event.source.get('_id', '')
 | 
					            event_id = event.source.get('_id', '')
 | 
				
			||||||
            if event_id in self.marked_events:
 | 
					            if event_id in self.marked_events:
 | 
				
			||||||
                return
 | 
					                return
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            self.marked_events.add(event_id)
 | 
					            self.marked_events.add(event_id)
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            # Show organization info for cross-org awareness
 | 
					            # Build comprehensive message
 | 
				
			||||||
            if attr == "ip":
 | 
					            if attr == "ip":
 | 
				
			||||||
                msg = "MISP: Malicious IP detected - "
 | 
					                msg = "MISP: Malicious IP detected"
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                msg = "MISP: Known indicator - "
 | 
					                msg = f"MISP: Known {attr.upper()} indicator"
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            # Collect unique events and organizations
 | 
					            # Collect event and organization info
 | 
				
			||||||
            events_info = {}
 | 
					            events_info = {}
 | 
				
			||||||
            orgs_info = set()
 | 
					            orgs_info = set()
 | 
				
			||||||
 | 
					            threat_levels = set()
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            for misp_attr in result:
 | 
					            for misp_attr in misp_results:
 | 
				
			||||||
                event_info = misp_attr.get("Event", {})
 | 
					                event_info = misp_attr.get("Event", {})
 | 
				
			||||||
                event_id = event_info.get("id", "")
 | 
					                event_id_misp = event_info.get("id", "")
 | 
				
			||||||
                event_desc = event_info.get("info", "Unknown")
 | 
					                event_desc = event_info.get("info", "Unknown")
 | 
				
			||||||
                org_name = event_info.get("Orgc", {}).get("name", "Unknown Org")
 | 
					                org_name = event_info.get("Orgc", {}).get("name", "Unknown")
 | 
				
			||||||
 | 
					                threat_level = event_info.get("threat_level_id", "")
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                events_info[event_id] = f'"{event_desc}"'
 | 
					                events_info[event_id_misp] = event_desc[:50]  # Truncate long descriptions
 | 
				
			||||||
                orgs_info.add(org_name)
 | 
					                orgs_info.add(org_name)
 | 
				
			||||||
 | 
					                if threat_level:
 | 
				
			||||||
 | 
					                    threat_levels.add(threat_level)
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            # Build message with org info
 | 
					            # Enhanced message with threat context
 | 
				
			||||||
            event_descriptions = list(events_info.values())[:2]  # First 2 events
 | 
					            event_descriptions = list(events_info.values())[:2]
 | 
				
			||||||
            msg += " | ".join(event_descriptions)
 | 
					            if event_descriptions:
 | 
				
			||||||
 | 
					                msg += f" | Events: {' | '.join(event_descriptions)}"
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            if len(result) > 2:
 | 
					            if len(misp_results) > 2:
 | 
				
			||||||
                msg += f" | +{len(result)-2} more"
 | 
					                msg += f" | +{len(misp_results)-2} more indicators"
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            # Add organization information
 | 
					            # Organization information
 | 
				
			||||||
            if len(orgs_info) > 1:
 | 
					            if len(orgs_info) > 1:
 | 
				
			||||||
                msg += f" | Orgs: {', '.join(list(orgs_info)[:3])}"
 | 
					                msg += f" | Sources: {', '.join(list(orgs_info)[:3])}"
 | 
				
			||||||
            elif orgs_info:
 | 
					            elif orgs_info and list(orgs_info)[0] != "Unknown":
 | 
				
			||||||
                org_name = list(orgs_info)[0]
 | 
					                msg += f" | Source: {list(orgs_info)[0]}"
 | 
				
			||||||
                if org_name != "Unknown Org":
 | 
					            
 | 
				
			||||||
                    msg += f" | Org: {org_name}"
 | 
					            # Threat level context
 | 
				
			||||||
 | 
					            if threat_levels:
 | 
				
			||||||
 | 
					                highest_threat = min(threat_levels)  # Lower number = higher threat
 | 
				
			||||||
 | 
					                threat_map = {"1": "HIGH", "2": "MEDIUM", "3": "LOW", "4": "UNDEFINED"}
 | 
				
			||||||
 | 
					                msg += f" | Threat: {threat_map.get(str(highest_threat), 'UNKNOWN')}"
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            # Add tags and comment
 | 
				
			||||||
 | 
					            tags = [f"MISP-{attr}", "threat-intel"]
 | 
				
			||||||
 | 
					            if self.include_community and len(orgs_info) > 1:
 | 
				
			||||||
 | 
					                tags.append("cross-org-intel")
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            event.add_comment(msg)
 | 
					            event.add_comment(msg)
 | 
				
			||||||
            event.add_tags([f"MISP-{attr}", "threat-intel", "cross-org-intel"])
 | 
					            event.add_tags(tags)
 | 
				
			||||||
            event.commit()
 | 
					            event.commit()
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
 | 
					            self.stats['events_marked'] += 1
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
        except Exception as e:
 | 
					        except Exception as e:
 | 
				
			||||||
            logger.error(f"Error marking event: {e}")
 | 
					            logger.error(f"Error marking event {event_id}: {e}")
 | 
				
			||||||
 | 
					            self.stats['errors'] += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def query_misp(self, query, attr, timesketch_attr):
 | 
					    def process_events_chunk(self, events_chunk: List[Any], attr: str, timesketch_attr: str) -> None:
 | 
				
			||||||
        """Extract indicators and query MISP."""
 | 
					        """Process a chunk of events efficiently."""
 | 
				
			||||||
        events = self.event_stream(query_string=query, return_fields=[timesketch_attr, '_id'])
 | 
					        # Extract all indicators from the chunk
 | 
				
			||||||
        query_list = []
 | 
					        chunk_indicators = []
 | 
				
			||||||
        events_list = []
 | 
					        event_to_indicators = {}
 | 
				
			||||||
        processed = 0
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        # Extract indicators from events
 | 
					        for event in events_chunk:
 | 
				
			||||||
        for event in events:
 | 
					            indicators = self.extract_indicators_from_event(event, attr, timesketch_attr)
 | 
				
			||||||
            processed += 1
 | 
					            if indicators:
 | 
				
			||||||
            if processed > 5000:
 | 
					                event_id = event.source.get('_id', '')
 | 
				
			||||||
                break
 | 
					                event_to_indicators[event_id] = (event, indicators)
 | 
				
			||||||
 | 
					                chunk_indicators.extend(indicators)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
            loc = event.source.get(timesketch_attr)
 | 
					        # Remove duplicates while preserving order
 | 
				
			||||||
            if not loc:
 | 
					        unique_indicators = list(dict.fromkeys(chunk_indicators))
 | 
				
			||||||
                continue
 | 
					        new_indicators = [ind for ind in unique_indicators if ind not in self.processed_indicators]
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
            events_list.append(event)
 | 
					        if not new_indicators:
 | 
				
			||||||
            indicators = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            # Extract based on attribute type
 | 
					 | 
				
			||||||
            if attr == "ip" and timesketch_attr == "message":
 | 
					 | 
				
			||||||
                ip_matches = self.ip_pattern.findall(str(loc))
 | 
					 | 
				
			||||||
                indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
 | 
					 | 
				
			||||||
                
 | 
					 | 
				
			||||||
            elif attr == "ip" and timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
 | 
					 | 
				
			||||||
                if self._is_valid_ip(str(loc)):
 | 
					 | 
				
			||||||
                    indicators = [str(loc)]
 | 
					 | 
				
			||||||
                    
 | 
					 | 
				
			||||||
            elif attr in ["md5", "sha1", "sha256"]:
 | 
					 | 
				
			||||||
                if self._is_valid_hash(str(loc), attr):
 | 
					 | 
				
			||||||
                    indicators = [str(loc)]
 | 
					 | 
				
			||||||
                    
 | 
					 | 
				
			||||||
            elif attr == "filename":
 | 
					 | 
				
			||||||
                filename = ntpath.basename(str(loc))
 | 
					 | 
				
			||||||
                if filename and len(filename) > 1:
 | 
					 | 
				
			||||||
                    indicators = [filename]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            # Add valid indicators to query list
 | 
					 | 
				
			||||||
            for indicator in indicators:
 | 
					 | 
				
			||||||
                if indicator not in query_list:
 | 
					 | 
				
			||||||
                    query_list.append(indicator)
 | 
					 | 
				
			||||||
                    self.result_dict[f"{attr}:{indicator}"] = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        logger.info(f"Extracted {len(query_list)} {attr} indicators from {processed} events")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if not query_list:
 | 
					 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        # Query MISP for each indicator
 | 
					        logger.info(f"Processing {len(new_indicators)} new indicators from chunk of {len(events_chunk)} events")
 | 
				
			||||||
        for indicator in query_list:
 | 
					 | 
				
			||||||
            result = self.query_misp_single(indicator, attr)
 | 
					 | 
				
			||||||
            if result:
 | 
					 | 
				
			||||||
                self.result_dict[f"{attr}:{indicator}"] = result
 | 
					 | 
				
			||||||
                # Log organization diversity
 | 
					 | 
				
			||||||
                orgs = set()
 | 
					 | 
				
			||||||
                for r in result:
 | 
					 | 
				
			||||||
                    org = r.get("Event", {}).get("Orgc", {}).get("name", "Unknown")
 | 
					 | 
				
			||||||
                    orgs.add(org)
 | 
					 | 
				
			||||||
                logger.info(f"MISP hit: {indicator} ({len(result)} indicators from {len(orgs)} orgs)")
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        # Mark matching events 
 | 
					        # Query MISP for new indicators
 | 
				
			||||||
        for event in events_list:
 | 
					        misp_results = self.query_misp_batch(new_indicators, attr)
 | 
				
			||||||
            loc = event.source.get(timesketch_attr)
 | 
					 | 
				
			||||||
            if not loc:
 | 
					 | 
				
			||||||
                continue
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
            # Check if event already processed
 | 
					        # Update processed indicators and result cache
 | 
				
			||||||
            event_id = event.source.get('_id', '')
 | 
					        self.processed_indicators.update(new_indicators)
 | 
				
			||||||
 | 
					        for indicator, results in misp_results.items():
 | 
				
			||||||
 | 
					            if results:
 | 
				
			||||||
 | 
					                self.result_dict[f"{attr}:{indicator}"] = results
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Mark events that have matching indicators
 | 
				
			||||||
 | 
					        for event_id, (event, indicators) in event_to_indicators.items():
 | 
				
			||||||
            if event_id in self.marked_events:
 | 
					            if event_id in self.marked_events:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
            # Re-extract indicators from this event
 | 
					            matching_results = []
 | 
				
			||||||
            event_indicators = []
 | 
					            for indicator in indicators:
 | 
				
			||||||
            
 | 
					 | 
				
			||||||
            if attr == "ip" and timesketch_attr == "message":
 | 
					 | 
				
			||||||
                ip_matches = self.ip_pattern.findall(str(loc))
 | 
					 | 
				
			||||||
                event_indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
 | 
					 | 
				
			||||||
            elif attr == "ip" and timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
 | 
					 | 
				
			||||||
                if self._is_valid_ip(str(loc)):
 | 
					 | 
				
			||||||
                    event_indicators = [str(loc)]
 | 
					 | 
				
			||||||
            elif attr in ["md5", "sha1", "sha256"]:
 | 
					 | 
				
			||||||
                if self._is_valid_hash(str(loc), attr):
 | 
					 | 
				
			||||||
                    event_indicators = [str(loc)]
 | 
					 | 
				
			||||||
            elif attr == "filename":
 | 
					 | 
				
			||||||
                filename = ntpath.basename(str(loc))
 | 
					 | 
				
			||||||
                if filename:
 | 
					 | 
				
			||||||
                    event_indicators = [filename]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            # Check if any indicator has MISP match
 | 
					 | 
				
			||||||
            for indicator in event_indicators:
 | 
					 | 
				
			||||||
                key = f"{attr}:{indicator}"
 | 
					                key = f"{attr}:{indicator}"
 | 
				
			||||||
                if key in self.result_dict and self.result_dict[key]:
 | 
					                if key in self.result_dict:
 | 
				
			||||||
                    self.total_event_counter += 1
 | 
					                    matching_results.extend(self.result_dict[key])
 | 
				
			||||||
                    self.mark_event(event, self.result_dict[key], attr)
 | 
					 | 
				
			||||||
                    break  # Only mark once per event
 | 
					 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
        # Create view if we found matches
 | 
					            if matching_results:
 | 
				
			||||||
        if self.total_event_counter > 0:
 | 
					                self.mark_event_with_intel(event, matching_results, attr)
 | 
				
			||||||
            self.sketch.add_view(
 | 
					
 | 
				
			||||||
                view_name="MISP Cross-Org Threat Intel",
 | 
					    def query_misp(self, query: str, attr: str, timesketch_attr: str) -> None:
 | 
				
			||||||
                analyzer_name=self.NAME,
 | 
					        """Main processing function with chunked approach for large datasets."""
 | 
				
			||||||
                query_string='tag:"MISP-*" OR tag:"threat-intel" OR tag:"cross-org-intel"',
 | 
					        logger.info(f"Starting MISP analysis for {attr} in {timesketch_attr}")
 | 
				
			||||||
 | 
					        logger.info(f"Community querying: {'enabled' if self.include_community else 'disabled'}")
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Process events in chunks to manage memory
 | 
				
			||||||
 | 
					        events_stream = self.event_stream(
 | 
				
			||||||
 | 
					            query_string=query, 
 | 
				
			||||||
 | 
					            return_fields=[timesketch_attr, '_id', 'timestamp']
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    def run(self):
 | 
					        current_chunk = []
 | 
				
			||||||
        """Entry point for the analyzer."""
 | 
					 | 
				
			||||||
        if not self.misp_url or not self.misp_api_key:
 | 
					 | 
				
			||||||
            return "No MISP configuration found"
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            self.query_misp(self._query_string, self._attr, self._timesketch_attr)
 | 
					            for event in events_stream:
 | 
				
			||||||
            return f"[{self._timesketch_attr}] MISP Match: {self.total_event_counter}"
 | 
					                current_chunk.append(event)
 | 
				
			||||||
 | 
					                self.stats['events_processed'] += 1
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                # Process chunk when it reaches the specified size
 | 
				
			||||||
 | 
					                if len(current_chunk) >= self.chunk_size:
 | 
				
			||||||
 | 
					                    self.process_events_chunk(current_chunk, attr, timesketch_attr)
 | 
				
			||||||
 | 
					                    current_chunk = []
 | 
				
			||||||
 | 
					                    
 | 
				
			||||||
 | 
					                    # Progress logging
 | 
				
			||||||
 | 
					                    if self.stats['events_processed'] % 10000 == 0:
 | 
				
			||||||
 | 
					                        logger.info(f"Progress: {self.stats['events_processed']} events processed, "
 | 
				
			||||||
 | 
					                                  f"{self.stats['events_marked']} marked, "
 | 
				
			||||||
 | 
					                                  f"{self.stats['api_calls_made']} API calls made")
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            # Process remaining events in the last chunk
 | 
				
			||||||
 | 
					            if current_chunk:
 | 
				
			||||||
 | 
					                self.process_events_chunk(current_chunk, attr, timesketch_attr)
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
        except Exception as e:
 | 
					        except Exception as e:
 | 
				
			||||||
            logger.error(f"MISP analyzer error: {e}")
 | 
					            logger.error(f"Error during event processing: {e}")
 | 
				
			||||||
 | 
					            self.stats['errors'] += 1
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Create comprehensive view if we found matches
 | 
				
			||||||
 | 
					        if self.stats['events_marked'] > 0:
 | 
				
			||||||
 | 
					            view_name = f"MISP Threat Intel - {attr.upper()}"
 | 
				
			||||||
 | 
					            if self.include_community:
 | 
				
			||||||
 | 
					                view_name += " (Cross-Org)"
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					            self.sketch.add_view(
 | 
				
			||||||
 | 
					                view_name=view_name,
 | 
				
			||||||
 | 
					                analyzer_name=self.NAME,
 | 
				
			||||||
 | 
					                query_string=f'tag:"MISP-{attr}" OR tag:"threat-intel"',
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def run(self) -> str:
 | 
				
			||||||
 | 
					        """Entry point for the analyzer with comprehensive error handling."""
 | 
				
			||||||
 | 
					        if not self.misp_url or not self.misp_api_key:
 | 
				
			||||||
 | 
					            return "Error: No MISP configuration found"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        start_time = time.time()
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            logger.info(f"Starting MISP analyzer with config: "
 | 
				
			||||||
 | 
					                       f"batch_size={self.batch_size}, "
 | 
				
			||||||
 | 
					                       f"api_batch_size={self.api_batch_size}, "
 | 
				
			||||||
 | 
					                       f"chunk_size={self.chunk_size}, "
 | 
				
			||||||
 | 
					                       f"include_community={self.include_community}")
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            self.query_misp(self._query_string, self._attr, self._timesketch_attr)
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            elapsed_time = time.time() - start_time
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            # Comprehensive results summary
 | 
				
			||||||
 | 
					            result_msg = (f"[{self._timesketch_attr}] MISP Analysis Complete: "
 | 
				
			||||||
 | 
					                         f"{self.stats['events_marked']}/{self.stats['events_processed']} events marked | "
 | 
				
			||||||
 | 
					                         f"{self.stats['api_calls_made']} API calls | "
 | 
				
			||||||
 | 
					                         f"{len(self.processed_indicators)} indicators processed | "
 | 
				
			||||||
 | 
					                         f"{elapsed_time:.1f}s")
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            if self.stats['errors'] > 0:
 | 
				
			||||||
 | 
					                result_msg += f" | {self.stats['errors']} errors"
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            logger.info(result_msg)
 | 
				
			||||||
 | 
					            return result_msg
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            logger.error(f"MISP analyzer critical error: {e}")
 | 
				
			||||||
            return f"[{self._timesketch_attr}] MISP Error: {str(e)}"
 | 
					            return f"[{self._timesketch_attr}] MISP Error: {str(e)}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user