dnsrecon/src/tld_fetcher.py
overcuriousity 0c9cf00a3b progress
2025-09-09 14:54:02 +02:00

142 lines
5.6 KiB
Python

# File: src/tld_fetcher.py
"""Fetch and cache IANA TLD list."""
import requests
import logging
from typing import List, Set, Optional
import os
import time
# Module logger
logger = logging.getLogger(__name__)
class TLDFetcher:
"""Fetches and caches IANA TLD list."""
IANA_TLD_URL = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
CACHE_FILE = "tlds_cache.txt"
CACHE_DURATION = 86400 # 24 hours in seconds
def __init__(self):
self._tlds: Optional[Set[str]] = None
logger.info("🌐 TLD fetcher initialized")
def get_tlds(self) -> Set[str]:
"""Get list of TLDs, using cache if available."""
if self._tlds is None:
logger.debug("🔍 Loading TLD list...")
self._tlds = self._load_tlds()
logger.info(f"✅ Loaded {len(self._tlds)} TLDs")
return self._tlds
def _load_tlds(self) -> Set[str]:
"""Load TLDs from cache or fetch from IANA."""
if self._is_cache_valid():
logger.debug("📂 Loading TLDs from cache")
return self._load_from_cache()
else:
logger.info("🌐 Fetching fresh TLD list from IANA")
return self._fetch_and_cache()
def _is_cache_valid(self) -> bool:
"""Check if cache file exists and is recent."""
if not os.path.exists(self.CACHE_FILE):
logger.debug("❌ TLD cache file does not exist")
return False
cache_age = time.time() - os.path.getmtime(self.CACHE_FILE)
is_valid = cache_age < self.CACHE_DURATION
if is_valid:
logger.debug(f"✅ TLD cache is valid (age: {cache_age/3600:.1f} hours)")
else:
logger.debug(f"❌ TLD cache is expired (age: {cache_age/3600:.1f} hours)")
return is_valid
def _load_from_cache(self) -> Set[str]:
"""Load TLDs from cache file."""
try:
with open(self.CACHE_FILE, 'r', encoding='utf-8') as f:
tlds = set()
for line in f:
line = line.strip().lower()
if line and not line.startswith('#'):
tlds.add(line)
logger.info(f"📂 Loaded {len(tlds)} TLDs from cache")
return tlds
except Exception as e:
logger.error(f"❌ Error loading TLD cache: {e}")
# Fall back to fetching fresh data
return self._fetch_and_cache()
def _fetch_and_cache(self) -> Set[str]:
"""Fetch TLDs from IANA and cache them."""
try:
logger.info(f"📡 Fetching TLD list from: {self.IANA_TLD_URL}")
response = requests.get(
self.IANA_TLD_URL,
timeout=30,
headers={'User-Agent': 'DNS-Recon-Tool/1.0'}
)
response.raise_for_status()
tlds = set()
lines_processed = 0
for line in response.text.split('\n'):
line = line.strip().lower()
if line and not line.startswith('#'):
tlds.add(line)
lines_processed += 1
logger.info(f"✅ Fetched {len(tlds)} TLDs from IANA (processed {lines_processed} lines)")
# Cache the results
try:
with open(self.CACHE_FILE, 'w', encoding='utf-8') as f:
f.write(response.text)
logger.info(f"💾 TLD list cached to {self.CACHE_FILE}")
except Exception as cache_error:
logger.warning(f"⚠️ Could not cache TLD list: {cache_error}")
return tlds
except requests.exceptions.Timeout:
logger.error("⏱️ Timeout fetching TLD list from IANA")
return self._get_fallback_tlds()
except requests.exceptions.RequestException as e:
logger.error(f"🌐 Network error fetching TLD list: {e}")
return self._get_fallback_tlds()
except Exception as e:
logger.error(f"❌ Unexpected error fetching TLD list: {e}")
return self._get_fallback_tlds()
def _get_fallback_tlds(self) -> Set[str]:
"""Return a minimal set of common TLDs if fetch fails."""
logger.warning("⚠️ Using fallback TLD list")
fallback_tlds = {
# Generic top-level domains
'com', 'org', 'net', 'edu', 'gov', 'mil', 'int', 'info', 'biz', 'name',
# Country code top-level domains (major ones)
'us', 'uk', 'de', 'fr', 'it', 'es', 'nl', 'be', 'ch', 'at', 'se', 'no',
'dk', 'fi', 'pl', 'cz', 'hu', 'ro', 'bg', 'hr', 'si', 'sk', 'lt', 'lv',
'ee', 'ie', 'pt', 'gr', 'cy', 'mt', 'lu', 'is', 'li', 'ad', 'mc', 'sm',
'va', 'by', 'ua', 'md', 'ru', 'kz', 'kg', 'tj', 'tm', 'uz', 'am', 'az',
'ge', 'tr', 'il', 'jo', 'lb', 'sy', 'iq', 'ir', 'af', 'pk', 'in', 'lk',
'mv', 'bt', 'bd', 'np', 'mm', 'th', 'la', 'kh', 'vn', 'my', 'sg', 'bn',
'id', 'tl', 'ph', 'tw', 'hk', 'mo', 'cn', 'kp', 'kr', 'jp', 'mn',
# Common compound TLDs
'co.uk', 'org.uk', 'ac.uk', 'gov.uk', 'com.au', 'org.au', 'net.au',
'gov.au', 'edu.au', 'co.za', 'org.za', 'net.za', 'gov.za', 'ac.za',
'co.nz', 'org.nz', 'net.nz', 'govt.nz', 'ac.nz', 'co.jp', 'or.jp',
'ne.jp', 'go.jp', 'ac.jp', 'ad.jp', 'ed.jp', 'gr.jp', 'lg.jp'
}
logger.info(f"📋 Using {len(fallback_tlds)} fallback TLDs")
return fallback_tlds