fixes to hint for incomplete data
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Set
|
||||
from typing import List, Dict, Any, Set, Optional
|
||||
from urllib.parse import quote
|
||||
from datetime import datetime, timezone
|
||||
import requests
|
||||
@@ -285,6 +285,17 @@ class CrtShProvider(BaseProvider):
|
||||
if self._stop_event and self._stop_event.is_set():
|
||||
self.logger.logger.info(f"CrtSh processing cancelled before processing for domain: {query_domain}")
|
||||
return result
|
||||
|
||||
incompleteness_warning = self._check_for_incomplete_data(query_domain, certificates)
|
||||
if incompleteness_warning:
|
||||
result.add_attribute(
|
||||
target_node=query_domain,
|
||||
name="crtsh_data_warning",
|
||||
value=incompleteness_warning,
|
||||
attr_type='metadata',
|
||||
provider=self.name,
|
||||
confidence=1.0
|
||||
)
|
||||
|
||||
all_discovered_domains = set()
|
||||
processed_issuers = set()
|
||||
@@ -577,4 +588,30 @@ class CrtShProvider(BaseProvider):
|
||||
elif query_domain.endswith(f'.{cert_domain}'):
|
||||
return 'parent_domain'
|
||||
else:
|
||||
return 'related_domain'
|
||||
return 'related_domain'
|
||||
|
||||
def _check_for_incomplete_data(self, domain: str, certificates: List[Dict[str, Any]]) -> Optional[str]:
|
||||
"""
|
||||
Analyzes the certificate list to heuristically detect if the data from crt.sh is incomplete.
|
||||
"""
|
||||
cert_count = len(certificates)
|
||||
|
||||
# Heuristic 1: Check if the number of certs hits a known hard limit.
|
||||
if cert_count >= 10000:
|
||||
return f"Result likely truncated; received {cert_count} certificates, which may be the maximum limit."
|
||||
|
||||
# Heuristic 2: Check if all returned certificates are old.
|
||||
if cert_count > 1000: # Only apply this for a reasonable number of certs
|
||||
latest_expiry = None
|
||||
for cert in certificates:
|
||||
try:
|
||||
not_after = self._parse_certificate_date(cert.get('not_after'))
|
||||
if latest_expiry is None or not_after > latest_expiry:
|
||||
latest_expiry = not_after
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
if latest_expiry and (datetime.now(timezone.utc) - latest_expiry).days > 365:
|
||||
return f"Incomplete data suspected: The latest certificate expired more than a year ago ({latest_expiry.strftime('%Y-%m-%d')})."
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user