adjustments

2025-09-09 16:10:22 +02:00 · 2025-09-09 16:10:22 +02:00 · cd80d6f569
commit cd80d6f569
parent c105ebbb4b
5 changed files with 260 additions and 76 deletions
--- a/src/certificate_checker.py
+++ b/src/certificate_checker.py
@ -208,9 +208,6 @@ class CertificateChecker:
                # Track connection failures
                if isinstance(e, requests.exceptions.ConnectionError):
                    self.connection_failures += 1
                    if self.connection_failures >= self.max_connection_failures:
                        logger.error(f"❌ Too many connection failures to crt.sh. Disabling certificate lookups.")
                        return certificates
                if attempt < max_retries - 1:
                    time.sleep(backoff_delays[attempt])
--- a/src/dns_resolver.py
+++ b/src/dns_resolver.py
@ -1,5 +1,5 @@
 # File: src/dns_resolver.py
-"""DNS resolution functionality."""
+"""DNS resolution functionality with enhanced TLD testing."""
 import dns.resolver
 import dns.reversename
@ -16,7 +16,7 @@ from .config import Config
 logger = logging.getLogger(__name__)
 class DNSResolver:
-    """DNS resolution and record lookup."""
+    """DNS resolution and record lookup with optimized TLD testing."""
    # All DNS record types to query
    RECORD_TYPES = [
@ -48,8 +48,40 @@ class DNSResolver:
        self.last_request = time.time()
        self.query_count += 1
    def resolve_hostname_fast(self, hostname: str) -> List[str]:
        """Fast hostname resolution optimized for TLD testing."""
        ips = []
        logger.debug(f"🚀 Fast resolving hostname: {hostname}")
        # Use only the first DNS server and shorter timeout for TLD testing
        resolver = dns.resolver.Resolver()
        resolver.nameservers = [self.config.DNS_SERVERS[0]]  # Use primary DNS only
        resolver.timeout = 2  # Shorter timeout for TLD testing
        resolver.lifetime = 2  # Total query time limit
        try:
            # Try A records only for speed (most common)
            answers = resolver.resolve(hostname, 'A')
            for answer in answers:
                ips.append(str(answer))
                logger.debug(f"⚡ Fast A record for {hostname}: {answer}")
        except dns.resolver.NXDOMAIN:
            logger.debug(f"❌ NXDOMAIN for {hostname}")
        except dns.resolver.NoAnswer:
            logger.debug(f"⚠️ No A record for {hostname}")
        except dns.resolver.Timeout:
            logger.debug(f"⏱️ Timeout for {hostname}")
        except Exception as e:
            logger.debug(f"⚠️ Error fast resolving {hostname}: {e}")
        if ips:
            logger.debug(f"⚡ Fast resolved {hostname} to {len(ips)} IPs: {ips}")
        return ips
    def resolve_hostname(self, hostname: str) -> List[str]:
-        """Resolve hostname to IP addresses."""
+        """Resolve hostname to IP addresses (full resolution with retries)."""
        ips = []
        logger.debug(f"🔍 Resolving hostname: {hostname}")
--- a/src/reconnaissance.py
+++ b/src/reconnaissance.py
@ -1,11 +1,11 @@
 # File: src/reconnaissance.py
-"""Main reconnaissance logic."""
+"""Main reconnaissance logic with enhanced TLD expansion."""
 import threading
 import concurrent.futures
 import logging
 from datetime import datetime
-from typing import Set, List, Optional
+from typing import Set, List, Optional, Tuple
 from .data_structures import ReconData
 from .config import Config
 from .dns_resolver import DNSResolver
@ -18,7 +18,7 @@ from .tld_fetcher import TLDFetcher
 logger = logging.getLogger(__name__)
 class ReconnaissanceEngine:
-    """Main reconnaissance engine."""
+    """Main reconnaissance engine with smart TLD expansion."""
    def __init__(self, config: Config):
        self.config = config
@ -91,7 +91,7 @@ class ReconnaissanceEngine:
            else:
                logger.info(f"🔍 Target '{target}' appears to be a hostname, expanding to all TLDs")
                self._update_progress(f"Expanding {target} to all TLDs", 5)
-                initial_targets = self._expand_hostname_to_tlds(target)
+                initial_targets = self._expand_hostname_to_tlds_smart(target)
                logger.info(f"📋 Found {len(initial_targets)} valid domains after TLD expansion")
            self._update_progress("Resolving initial targets", 10)
@ -119,36 +119,115 @@ class ReconnaissanceEngine:
        return self.data
-    def _expand_hostname_to_tlds(self, hostname: str) -> Set[str]:
+    def _expand_hostname_to_tlds_smart(self, hostname: str) -> Set[str]:
-        """Expand hostname to all possible TLDs."""
+        """Smart TLD expansion with prioritization and parallel processing."""
-        logger.info(f"🌐 Fetching TLD list for hostname expansion")
+        logger.info(f"🌐 Starting smart TLD expansion for hostname: {hostname}")
        tlds = self.tld_fetcher.get_tlds()
        logger.info(f"🔍 Testing against {len(tlds)} TLDs")
-        targets = set()
+        # Get prioritized TLD lists
        priority_tlds, normal_tlds, deprioritized_tlds = self.tld_fetcher.get_prioritized_tlds()
        logger.info(f"📊 TLD categories: {len(priority_tlds)} priority, "
                   f"{len(normal_tlds)} normal, {len(deprioritized_tlds)} deprioritized")
        valid_domains = set()
        # Phase 1: Check priority TLDs first (parallel processing)
        logger.info("🚀 Phase 1: Checking priority TLDs...")
        priority_results = self._check_tlds_parallel(hostname, priority_tlds, "priority")
        valid_domains.update(priority_results)
        self._update_progress(f"Phase 1 complete: {len(priority_results)} priority TLD matches", 6)
        # Phase 2: Check normal TLDs (if we found fewer than 5 results)
        if len(valid_domains) < 5:
            logger.info("🔍 Phase 2: Checking normal TLDs...")
            normal_results = self._check_tlds_parallel(hostname, normal_tlds, "normal")
            valid_domains.update(normal_results)
            self._update_progress(f"Phase 2 complete: {len(normal_results)} normal TLD matches", 8)
        else:
            logger.info(f"⏭️ Skipping normal TLDs (found {len(valid_domains)} matches in priority)")
        # Phase 3: Check deprioritized TLDs only if we found very few results
        if len(valid_domains) < 2:
            logger.info("🔍 Phase 3: Checking deprioritized TLDs (limited results so far)...")
            depri_results = self._check_tlds_parallel(hostname, deprioritized_tlds, "deprioritized")
            valid_domains.update(depri_results)
            self._update_progress(f"Phase 3 complete: {len(depri_results)} deprioritized TLD matches", 9)
        else:
            logger.info(f"⏭️ Skipping deprioritized TLDs (found {len(valid_domains)} matches already)")
        logger.info(f"🎯 Smart TLD expansion complete: found {len(valid_domains)} valid domains")
        return valid_domains
    def _check_tlds_parallel(self, hostname: str, tlds: List[str], phase_name: str) -> Set[str]:
        """Check TLDs in parallel with optimized settings."""
        valid_domains = set()
        tested_count = 0
        wildcard_detected = set()
-        for i, tld in enumerate(tlds):
+        # Use thread pool for parallel processing
-            full_hostname = f"{hostname}.{tld}"
+        max_workers = min(20, len(tlds))  # Limit concurrent requests
-            # Quick check if domain resolves
+        logger.info(f"⚡ Starting parallel check of {len(tlds)} {phase_name} TLDs "
-            ips = self.dns_resolver.resolve_hostname(full_hostname)
+                   f"with {max_workers} workers")
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit all tasks
            future_to_tld = {
                executor.submit(self._check_single_tld, hostname, tld): tld 
                for tld in tlds
            }
            # Process results as they complete
            for future in concurrent.futures.as_completed(future_to_tld):
                tld = future_to_tld[future]
                tested_count += 1
-            if ips:
+                try:
-                logger.info(f"✅ Found valid domain: {full_hostname} -> {ips}")
+                    result = future.result(timeout=10)  # 10 second timeout per future
                    if result:
                        full_hostname, ips = result
                        logger.info(f"✅ Valid domain found: {full_hostname} -> {ips}")
                        self.data.add_hostname(full_hostname, 0)
-                targets.add(full_hostname)
+                        valid_domains.add(full_hostname)
                        for ip in ips:
                            self.data.add_ip_address(ip)
-            # Progress update every 100 TLDs
+                    # Progress update every 50 TLDs in this phase
-            if i % 100 == 0:
+                    if tested_count % 50 == 0:
-                progress = 5 + int((i / len(tlds)) * 5)  # 5-10% range
+                        logger.info(f"📊 {phase_name.title()} phase progress: "
-                self._update_progress(f"Checked {i}/{len(tlds)} TLDs, found {len(targets)} valid domains", progress)
+                                  f"{tested_count}/{len(tlds)} tested, "
                                  f"{len(valid_domains)} found")
-        logger.info(f"🎯 TLD expansion complete: tested {tested_count} TLDs, found {len(targets)} valid domains")
+                except concurrent.futures.TimeoutError:
-        return targets
+                    logger.debug(f"⏱️ Timeout checking {hostname}.{tld}")
                except Exception as e:
                    logger.debug(f"⚠️ Error checking {hostname}.{tld}: {e}")
        logger.info(f"📊 {phase_name.title()} phase complete: "
                   f"tested {tested_count} TLDs, found {len(valid_domains)} valid domains, "
                   f"detected {len(wildcard_detected)} wildcards")
        return valid_domains
    def _check_single_tld(self, hostname: str, tld: str) -> Optional[Tuple[str, List[str]]]:
        """Check a single TLD combination with optimized DNS resolution."""
        full_hostname = f"{hostname}.{tld}"
        # Use faster DNS resolution with shorter timeout for TLD testing
        ips = self.dns_resolver.resolve_hostname_fast(full_hostname)
        if ips:
            logger.debug(f"✅ {full_hostname} -> {ips}")
            return (full_hostname, ips)
        return None
    def _process_targets_recursively(self, targets: Set[str]):
        """Process targets with recursive subdomain discovery."""
@ -161,7 +240,7 @@ class ReconnaissanceEngine:
            new_targets = set()
            for target in targets:
-                logger.debug(f"🔍 Processing target: {target}")
+                logger.debug(f"🎯 Processing target: {target}")
                # DNS resolution and record gathering
                self._process_single_target(target, current_depth)
@ -223,7 +302,7 @@ class ReconnaissanceEngine:
                self.data.certificates[hostname]
            )
            new_subdomains.update(cert_subdomains)
-            logger.debug(f"🔐 Extracted {len(cert_subdomains)} subdomains from certificates of {hostname}")
+            logger.debug(f"🔍 Extracted {len(cert_subdomains)} subdomains from certificates of {hostname}")
        # Filter out already known hostnames
        filtered_subdomains = new_subdomains - self.data.hostnames
@ -314,3 +393,8 @@ class ReconnaissanceEngine:
            'virustotal_results': len(self.data.virustotal_results)
        }
        logger.info(f"📊 External lookups summary: {ext_stats}")
    # Keep the original method name for backward compatibility
    def _expand_hostname_to_tlds(self, hostname: str) -> Set[str]:
        """Legacy method - redirects to smart expansion."""
        return self._expand_hostname_to_tlds_smart(hostname)
--- a/src/tld_fetcher.py
+++ b/src/tld_fetcher.py
@ -1,9 +1,9 @@
 # File: src/tld_fetcher.py
-"""Fetch and cache IANA TLD list."""
+"""Fetch and cache IANA TLD list with smart prioritization."""
 import requests
 import logging
-from typing import List, Set, Optional
+from typing import List, Set, Optional, Tuple
 import os
 import time
@ -11,15 +11,43 @@ import time
 logger = logging.getLogger(__name__)
 class TLDFetcher:
-    """Fetches and caches IANA TLD list."""
+    """Fetches and caches IANA TLD list with smart prioritization."""
    IANA_TLD_URL = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
    CACHE_FILE = "tlds_cache.txt"
    CACHE_DURATION = 86400  # 24 hours in seconds
    # Common TLDs that should be checked first (high success rate)
    PRIORITY_TLDS = {
        # Generic top-level domains (most common)
        'com', 'org', 'net', 'edu', 'gov', 'mil', 'int', 'info', 'biz', 'name',
        'io', 'co', 'me', 'tv', 'cc', 'ly', 'to', 'us', 'uk', 'ca',
        # Major country codes (high usage)
        'de', 'fr', 'it', 'es', 'nl', 'be', 'ch', 'at', 'se', 'no', 'dk', 'fi',
        'au', 'nz', 'jp', 'kr', 'cn', 'hk', 'sg', 'my', 'th', 'in', 'br', 'mx',
        'ru', 'pl', 'cz', 'hu', 'ro', 'bg', 'hr', 'si', 'sk', 'lt', 'lv', 'ee',
        'ie', 'pt', 'gr', 'cy', 'mt', 'lu', 'is', 'tr', 'il', 'za', 'ng', 'eg',
        # Popular new gTLDs (established, not spam-prone)
        'app', 'dev', 'tech', 'blog', 'news', 'shop', 'store', 'cloud', 'digital',
        'website', 'site', 'online', 'world', 'global', 'international'
    }
    # TLDs to deprioritize (often have wildcard DNS or low-quality domains)
    DEPRIORITIZED_PATTERNS = [
        'xn--',  # Internationalized domain names (often less common)
        # These TLDs are known for high wildcard/parking rates
        'tk', 'ml', 'ga', 'cf',  # Free TLDs often misused
        'top', 'win', 'download', 'stream', 'science', 'click', 'link',
        'loan', 'men', 'racing', 'review', 'party', 'trade', 'date',
        'cricket', 'accountant', 'faith', 'gdn', 'realtor'
    ]
    def __init__(self):
        self._tlds: Optional[Set[str]] = None
-        logger.info("🌐 TLD fetcher initialized")
+        self._prioritized_tlds: Optional[Tuple[List[str], List[str], List[str]]] = None
        logger.info("🌐 TLD fetcher initialized with smart prioritization")
    def get_tlds(self) -> Set[str]:
        """Get list of TLDs, using cache if available."""
@ -29,6 +57,40 @@ class TLDFetcher:
            logger.info(f"✅ Loaded {len(self._tlds)} TLDs")
        return self._tlds
    def get_prioritized_tlds(self) -> Tuple[List[str], List[str], List[str]]:
        """Get TLDs sorted by priority: (priority, normal, deprioritized)."""
        if self._prioritized_tlds is None:
            all_tlds = self.get_tlds()
            logger.debug("📊 Categorizing TLDs by priority...")
            priority_list = []
            normal_list = []
            deprioritized_list = []
            for tld in all_tlds:
                tld_lower = tld.lower()
                if tld_lower in self.PRIORITY_TLDS:
                    priority_list.append(tld_lower)
                elif any(pattern in tld_lower for pattern in self.DEPRIORITIZED_PATTERNS):
                    deprioritized_list.append(tld_lower)
                else:
                    normal_list.append(tld_lower)
            # Sort each category alphabetically for consistency
            priority_list.sort()
            normal_list.sort()
            deprioritized_list.sort()
            self._prioritized_tlds = (priority_list, normal_list, deprioritized_list)
            logger.info(f"📊 TLD prioritization complete: "
                       f"{len(priority_list)} priority, "
                       f"{len(normal_list)} normal, "
                       f"{len(deprioritized_list)} deprioritized")
        return self._prioritized_tlds
    def _load_tlds(self) -> Set[str]:
        """Load TLDs from cache or fetch from IANA."""
        if self._is_cache_valid():
@ -115,28 +177,37 @@ class TLDFetcher:
            return self._get_fallback_tlds()
    def _get_fallback_tlds(self) -> Set[str]:
-        """Return a minimal set of common TLDs if fetch fails."""
+        """Return a minimal set of short TLDs if fetch fails."""
        logger.warning("⚠️ Using fallback TLD list")
        # Use only short, well-established TLDs as fallback
        fallback_tlds = {
-            # Generic top-level domains
+            # 2-character TLDs (country codes - most established)
-            'com', 'org', 'net', 'edu', 'gov', 'mil', 'int', 'info', 'biz', 'name',
+            'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'ao', 'aq', 'ar', 'as', 'at',
            'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi',
            'bj', 'bl', 'bm', 'bn', 'bo', 'bq', 'br', 'bs', 'bt', 'bv', 'bw', 'by',
            'bz', 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn',
            'co', 'cr', 'cu', 'cv', 'cw', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm',
            'do', 'dz', 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj',
            'fk', 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi',
            'gl', 'gm', 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk',
            'hm', 'hn', 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq',
            'ir', 'is', 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km',
            'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr',
            'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mf', 'mg', 'mh',
            'mk', 'ml', 'mm', 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv',
            'mw', 'mx', 'my', 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no',
            'np', 'nr', 'nu', 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl',
            'pm', 'pn', 'pr', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru',
            'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl',
            'sm', 'sn', 'so', 'sr', 'ss', 'st', 'sv', 'sx', 'sy', 'sz', 'tc', 'td',
            'tf', 'tg', 'th', 'tj', 'tk', 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv',
            'tw', 'tz', 'ua', 'ug', 'uk', 'um', 'us', 'uy', 'uz', 'va', 'vc', 've',
            'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'ye', 'yt', 'za', 'zm', 'zw',
-            # Country code top-level domains (major ones)
+            # 3-character TLDs (generic - most common)
-            'us', 'uk', 'de', 'fr', 'it', 'es', 'nl', 'be', 'ch', 'at', 'se', 'no',
+            'com', 'org', 'net', 'edu', 'gov', 'mil', 'int'
            'dk', 'fi', 'pl', 'cz', 'hu', 'ro', 'bg', 'hr', 'si', 'sk', 'lt', 'lv',
            'ee', 'ie', 'pt', 'gr', 'cy', 'mt', 'lu', 'is', 'li', 'ad', 'mc', 'sm',
            'va', 'by', 'ua', 'md', 'ru', 'kz', 'kg', 'tj', 'tm', 'uz', 'am', 'az',
            'ge', 'tr', 'il', 'jo', 'lb', 'sy', 'iq', 'ir', 'af', 'pk', 'in', 'lk',
            'mv', 'bt', 'bd', 'np', 'mm', 'th', 'la', 'kh', 'vn', 'my', 'sg', 'bn',
            'id', 'tl', 'ph', 'tw', 'hk', 'mo', 'cn', 'kp', 'kr', 'jp', 'mn',
            # Common compound TLDs
            'co.uk', 'org.uk', 'ac.uk', 'gov.uk', 'com.au', 'org.au', 'net.au',
            'gov.au', 'edu.au', 'co.za', 'org.za', 'net.za', 'gov.za', 'ac.za',
            'co.nz', 'org.nz', 'net.nz', 'govt.nz', 'ac.nz', 'co.jp', 'or.jp',
            'ne.jp', 'go.jp', 'ac.jp', 'ad.jp', 'ed.jp', 'gr.jp', 'lg.jp'
        }
-        logger.info(f"📋 Using {len(fallback_tlds)} fallback TLDs")
+        logger.info(f"📋 Using {len(fallback_tlds)} fallback TLDs (≤3 characters)")
        return fallback_tlds
--- a/static/script.js
+++ b/static/script.js
@ -319,7 +319,7 @@ class ReconTool {
        const hostnameList = document.querySelector('#recentHostnames .hostname-list');
        if (hostnameList && data.hostnames && data.hostnames.length > 0) {
            // Show last 10 hostnames
-            const recentHostnames = data.hostnames.slice(-10);
+            const recentHostnames = data.hostnames;
            hostnameList.innerHTML = recentHostnames.map(hostname => 
                `<span class="discovery-item">${hostname}</span>`
            ).join('');
@ -332,7 +332,7 @@ class ReconTool {
        const ipList = document.querySelector('#recentIPs .ip-list');
        if (ipList && data.ip_addresses && data.ip_addresses.length > 0) {
            // Show last 10 IPs
-            const recentIPs = data.ip_addresses.slice(-10);
+            const recentIPs = data.ip_addresses;
            ipList.innerHTML = recentIPs.map(ip => 
                `<span class="discovery-item">${ip}</span>`
            ).join('');