postgres
This commit is contained in:
		
							parent
							
								
									7472e6f416
								
							
						
					
					
						commit
						8d402ab4b1
					
				@ -2,15 +2,37 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					import psycopg2
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
from typing import List, Dict, Any, Set, Optional
 | 
					from typing import List, Dict, Any, Set, Optional
 | 
				
			||||||
from urllib.parse import quote
 | 
					from urllib.parse import quote
 | 
				
			||||||
from datetime import datetime, timezone
 | 
					from datetime import datetime, timezone
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
 | 
					from psycopg2 import pool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .base_provider import BaseProvider
 | 
					from .base_provider import BaseProvider
 | 
				
			||||||
from core.provider_result import ProviderResult
 | 
					from core.provider_result import ProviderResult
 | 
				
			||||||
from utils.helpers import _is_valid_domain
 | 
					from utils.helpers import _is_valid_domain
 | 
				
			||||||
 | 
					from core.logger import get_forensic_logger
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# --- Global Instance for PostgreSQL Connection Pool ---
 | 
				
			||||||
 | 
					# This pool will be created once per worker process and is not part of the
 | 
				
			||||||
 | 
					# CrtShProvider instance, thus avoiding pickling errors.
 | 
				
			||||||
 | 
					db_pool = None
 | 
				
			||||||
 | 
					try:
 | 
				
			||||||
 | 
					    db_pool = psycopg2.pool.SimpleConnectionPool(
 | 
				
			||||||
 | 
					        1, 5,
 | 
				
			||||||
 | 
					        host='crt.sh',
 | 
				
			||||||
 | 
					        port=5432,
 | 
				
			||||||
 | 
					        user='guest',
 | 
				
			||||||
 | 
					        dbname='certwatch',
 | 
				
			||||||
 | 
					        sslmode='prefer',
 | 
				
			||||||
 | 
					        connect_timeout=60
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    # Use a generic logger here as this is at the module level
 | 
				
			||||||
 | 
					    get_forensic_logger().logger.info("crt.sh: Global PostgreSQL connection pool created successfully.")
 | 
				
			||||||
 | 
					except Exception as e:
 | 
				
			||||||
 | 
					    get_forensic_logger().logger.warning(f"crt.sh: Failed to create global DB connection pool: {e}. Will fall back to HTTP API.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CrtShProvider(BaseProvider):
 | 
					class CrtShProvider(BaseProvider):
 | 
				
			||||||
@ -121,7 +143,7 @@ class CrtShProvider(BaseProvider):
 | 
				
			|||||||
            
 | 
					            
 | 
				
			||||||
            else:  # "stale" or "not_found"
 | 
					            else:  # "stale" or "not_found"
 | 
				
			||||||
                # Query the API for the latest certificates
 | 
					                # Query the API for the latest certificates
 | 
				
			||||||
                new_raw_certs = self._query_crtsh_api(domain)
 | 
					                new_raw_certs = self._query_crtsh(domain)
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                if self._stop_event and self._stop_event.is_set():
 | 
					                if self._stop_event and self._stop_event.is_set():
 | 
				
			||||||
                    return ProviderResult()
 | 
					                    return ProviderResult()
 | 
				
			||||||
@ -152,8 +174,8 @@ class CrtShProvider(BaseProvider):
 | 
				
			|||||||
                # Save the new result and the raw data to the cache
 | 
					                # Save the new result and the raw data to the cache
 | 
				
			||||||
                self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)
 | 
					                self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        except requests.exceptions.RequestException as e:
 | 
					        except (requests.exceptions.RequestException, psycopg2.Error) as e:
 | 
				
			||||||
            self.logger.logger.error(f"API query failed for {domain}: {e}")
 | 
					            self.logger.logger.error(f"Upstream query failed for {domain}: {e}")
 | 
				
			||||||
            if cache_status != "not_found":
 | 
					            if cache_status != "not_found":
 | 
				
			||||||
                result = self._load_from_cache(cache_file)
 | 
					                result = self._load_from_cache(cache_file)
 | 
				
			||||||
                self.logger.logger.warning(f"Using stale cache for {domain} due to API failure.")
 | 
					                self.logger.logger.warning(f"Using stale cache for {domain} due to API failure.")
 | 
				
			||||||
@ -256,6 +278,58 @@ class CrtShProvider(BaseProvider):
 | 
				
			|||||||
        except Exception as e:
 | 
					        except Exception as e:
 | 
				
			||||||
            self.logger.logger.warning(f"Failed to save cache file for {domain}: {e}")
 | 
					            self.logger.logger.warning(f"Failed to save cache file for {domain}: {e}")
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
 | 
					    def _query_crtsh(self, domain: str) -> List[Dict[str, Any]]:
 | 
				
			||||||
 | 
					        """Query crt.sh, trying the database first and falling back to the API."""
 | 
				
			||||||
 | 
					        global db_pool
 | 
				
			||||||
 | 
					        if db_pool:
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                self.logger.logger.info(f"crt.sh: Attempting DB query for {domain}")
 | 
				
			||||||
 | 
					                return self._query_crtsh_db(domain)
 | 
				
			||||||
 | 
					            except psycopg2.Error as e:
 | 
				
			||||||
 | 
					                self.logger.logger.warning(f"crt.sh: DB query failed for {domain}: {e}. Falling back to HTTP API.")
 | 
				
			||||||
 | 
					                return self._query_crtsh_api(domain)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            self.logger.logger.info(f"crt.sh: No DB connection pool. Using HTTP API for {domain}")
 | 
				
			||||||
 | 
					            return self._query_crtsh_api(domain)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _query_crtsh_db(self, domain: str) -> List[Dict[str, Any]]:
 | 
				
			||||||
 | 
					        """Query crt.sh database for raw certificate data."""
 | 
				
			||||||
 | 
					        global db_pool
 | 
				
			||||||
 | 
					        conn = db_pool.getconn()
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            with conn.cursor() as cursor:
 | 
				
			||||||
 | 
					                query = """
 | 
				
			||||||
 | 
					                SELECT
 | 
				
			||||||
 | 
					                    c.id,
 | 
				
			||||||
 | 
					                    x509_serialnumber(c.certificate) as serial_number,
 | 
				
			||||||
 | 
					                    x509_notbefore(c.certificate) as not_before,
 | 
				
			||||||
 | 
					                    x509_notafter(c.certificate) as not_after,
 | 
				
			||||||
 | 
					                    c.issuer_ca_id,
 | 
				
			||||||
 | 
					                    ca.name as issuer_name,
 | 
				
			||||||
 | 
					                    x509_commonname(c.certificate) as common_name,
 | 
				
			||||||
 | 
					                    identities(c.certificate)::text as name_value
 | 
				
			||||||
 | 
					                FROM certificate c
 | 
				
			||||||
 | 
					                LEFT JOIN ca ON c.issuer_ca_id = ca.id
 | 
				
			||||||
 | 
					                WHERE identities(c.certificate) @@ plainto_tsquery(%s)
 | 
				
			||||||
 | 
					                ORDER BY c.id DESC
 | 
				
			||||||
 | 
					                LIMIT 5000;
 | 
				
			||||||
 | 
					                """
 | 
				
			||||||
 | 
					                cursor.execute(query, (domain,))
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					                results = []
 | 
				
			||||||
 | 
					                columns = [desc[0] for desc in cursor.description]
 | 
				
			||||||
 | 
					                for row in cursor.fetchall():
 | 
				
			||||||
 | 
					                    row_dict = dict(zip(columns, row))
 | 
				
			||||||
 | 
					                    if row_dict.get('not_before'):
 | 
				
			||||||
 | 
					                        row_dict['not_before'] = row_dict['not_before'].isoformat()
 | 
				
			||||||
 | 
					                    if row_dict.get('not_after'):
 | 
				
			||||||
 | 
					                        row_dict['not_after'] = row_dict['not_after'].isoformat()
 | 
				
			||||||
 | 
					                    results.append(row_dict)
 | 
				
			||||||
 | 
					                self.logger.logger.info(f"crt.sh: DB query for {domain} returned {len(results)} records.")
 | 
				
			||||||
 | 
					                return results
 | 
				
			||||||
 | 
					        finally:
 | 
				
			||||||
 | 
					            db_pool.putconn(conn)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]:
 | 
					    def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]:
 | 
				
			||||||
        """Query crt.sh API for raw certificate data."""
 | 
					        """Query crt.sh API for raw certificate data."""
 | 
				
			||||||
        url = f"{self.base_url}?q={quote(domain)}&output=json"
 | 
					        url = f"{self.base_url}?q={quote(domain)}&output=json"
 | 
				
			||||||
@ -468,6 +542,8 @@ class CrtShProvider(BaseProvider):
 | 
				
			|||||||
            raise ValueError("Empty date string")
 | 
					            raise ValueError("Empty date string")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
 | 
					            if isinstance(date_string, datetime):
 | 
				
			||||||
 | 
					                return date_string.replace(tzinfo=timezone.utc)
 | 
				
			||||||
            if date_string.endswith('Z'):
 | 
					            if date_string.endswith('Z'):
 | 
				
			||||||
                return datetime.fromisoformat(date_string[:-1]).replace(tzinfo=timezone.utc)
 | 
					                return datetime.fromisoformat(date_string[:-1]).replace(tzinfo=timezone.utc)
 | 
				
			||||||
            elif '+' in date_string or date_string.endswith('UTC'):
 | 
					            elif '+' in date_string or date_string.endswith('UTC'):
 | 
				
			||||||
 | 
				
			|||||||
@ -8,3 +8,4 @@ dnspython
 | 
				
			|||||||
gunicorn
 | 
					gunicorn
 | 
				
			||||||
redis
 | 
					redis
 | 
				
			||||||
python-dotenv
 | 
					python-dotenv
 | 
				
			||||||
 | 
					psycopg2-binary
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user