adjustments

This commit is contained in:
overcuriousity 2025-09-09 16:10:22 +02:00
parent c105ebbb4b
commit cd80d6f569
5 changed files with 260 additions and 76 deletions

View File

@ -208,9 +208,6 @@ class CertificateChecker:
# Track connection failures # Track connection failures
if isinstance(e, requests.exceptions.ConnectionError): if isinstance(e, requests.exceptions.ConnectionError):
self.connection_failures += 1 self.connection_failures += 1
if self.connection_failures >= self.max_connection_failures:
logger.error(f"❌ Too many connection failures to crt.sh. Disabling certificate lookups.")
return certificates
if attempt < max_retries - 1: if attempt < max_retries - 1:
time.sleep(backoff_delays[attempt]) time.sleep(backoff_delays[attempt])

View File

@ -1,5 +1,5 @@
# File: src/dns_resolver.py # File: src/dns_resolver.py
"""DNS resolution functionality.""" """DNS resolution functionality with enhanced TLD testing."""
import dns.resolver import dns.resolver
import dns.reversename import dns.reversename
@ -16,7 +16,7 @@ from .config import Config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class DNSResolver: class DNSResolver:
"""DNS resolution and record lookup.""" """DNS resolution and record lookup with optimized TLD testing."""
# All DNS record types to query # All DNS record types to query
RECORD_TYPES = [ RECORD_TYPES = [
@ -48,8 +48,40 @@ class DNSResolver:
self.last_request = time.time() self.last_request = time.time()
self.query_count += 1 self.query_count += 1
def resolve_hostname_fast(self, hostname: str) -> List[str]:
"""Fast hostname resolution optimized for TLD testing."""
ips = []
logger.debug(f"🚀 Fast resolving hostname: {hostname}")
# Use only the first DNS server and shorter timeout for TLD testing
resolver = dns.resolver.Resolver()
resolver.nameservers = [self.config.DNS_SERVERS[0]] # Use primary DNS only
resolver.timeout = 2 # Shorter timeout for TLD testing
resolver.lifetime = 2 # Total query time limit
try:
# Try A records only for speed (most common)
answers = resolver.resolve(hostname, 'A')
for answer in answers:
ips.append(str(answer))
logger.debug(f"⚡ Fast A record for {hostname}: {answer}")
except dns.resolver.NXDOMAIN:
logger.debug(f"❌ NXDOMAIN for {hostname}")
except dns.resolver.NoAnswer:
logger.debug(f"⚠️ No A record for {hostname}")
except dns.resolver.Timeout:
logger.debug(f"⏱️ Timeout for {hostname}")
except Exception as e:
logger.debug(f"⚠️ Error fast resolving {hostname}: {e}")
if ips:
logger.debug(f"⚡ Fast resolved {hostname} to {len(ips)} IPs: {ips}")
return ips
def resolve_hostname(self, hostname: str) -> List[str]: def resolve_hostname(self, hostname: str) -> List[str]:
"""Resolve hostname to IP addresses.""" """Resolve hostname to IP addresses (full resolution with retries)."""
ips = [] ips = []
logger.debug(f"🔍 Resolving hostname: {hostname}") logger.debug(f"🔍 Resolving hostname: {hostname}")

View File

@ -1,11 +1,11 @@
# File: src/reconnaissance.py # File: src/reconnaissance.py
"""Main reconnaissance logic.""" """Main reconnaissance logic with enhanced TLD expansion."""
import threading import threading
import concurrent.futures import concurrent.futures
import logging import logging
from datetime import datetime from datetime import datetime
from typing import Set, List, Optional from typing import Set, List, Optional, Tuple
from .data_structures import ReconData from .data_structures import ReconData
from .config import Config from .config import Config
from .dns_resolver import DNSResolver from .dns_resolver import DNSResolver
@ -18,7 +18,7 @@ from .tld_fetcher import TLDFetcher
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ReconnaissanceEngine: class ReconnaissanceEngine:
"""Main reconnaissance engine.""" """Main reconnaissance engine with smart TLD expansion."""
def __init__(self, config: Config): def __init__(self, config: Config):
self.config = config self.config = config
@ -91,7 +91,7 @@ class ReconnaissanceEngine:
else: else:
logger.info(f"🔍 Target '{target}' appears to be a hostname, expanding to all TLDs") logger.info(f"🔍 Target '{target}' appears to be a hostname, expanding to all TLDs")
self._update_progress(f"Expanding {target} to all TLDs", 5) self._update_progress(f"Expanding {target} to all TLDs", 5)
initial_targets = self._expand_hostname_to_tlds(target) initial_targets = self._expand_hostname_to_tlds_smart(target)
logger.info(f"📋 Found {len(initial_targets)} valid domains after TLD expansion") logger.info(f"📋 Found {len(initial_targets)} valid domains after TLD expansion")
self._update_progress("Resolving initial targets", 10) self._update_progress("Resolving initial targets", 10)
@ -119,36 +119,115 @@ class ReconnaissanceEngine:
return self.data return self.data
def _expand_hostname_to_tlds(self, hostname: str) -> Set[str]: def _expand_hostname_to_tlds_smart(self, hostname: str) -> Set[str]:
"""Expand hostname to all possible TLDs.""" """Smart TLD expansion with prioritization and parallel processing."""
logger.info(f"🌐 Fetching TLD list for hostname expansion") logger.info(f"🌐 Starting smart TLD expansion for hostname: {hostname}")
tlds = self.tld_fetcher.get_tlds()
logger.info(f"🔍 Testing against {len(tlds)} TLDs")
targets = set() # Get prioritized TLD lists
priority_tlds, normal_tlds, deprioritized_tlds = self.tld_fetcher.get_prioritized_tlds()
logger.info(f"📊 TLD categories: {len(priority_tlds)} priority, "
f"{len(normal_tlds)} normal, {len(deprioritized_tlds)} deprioritized")
valid_domains = set()
# Phase 1: Check priority TLDs first (parallel processing)
logger.info("🚀 Phase 1: Checking priority TLDs...")
priority_results = self._check_tlds_parallel(hostname, priority_tlds, "priority")
valid_domains.update(priority_results)
self._update_progress(f"Phase 1 complete: {len(priority_results)} priority TLD matches", 6)
# Phase 2: Check normal TLDs (if we found fewer than 5 results)
if len(valid_domains) < 5:
logger.info("🔍 Phase 2: Checking normal TLDs...")
normal_results = self._check_tlds_parallel(hostname, normal_tlds, "normal")
valid_domains.update(normal_results)
self._update_progress(f"Phase 2 complete: {len(normal_results)} normal TLD matches", 8)
else:
logger.info(f"⏭️ Skipping normal TLDs (found {len(valid_domains)} matches in priority)")
# Phase 3: Check deprioritized TLDs only if we found very few results
if len(valid_domains) < 2:
logger.info("🔍 Phase 3: Checking deprioritized TLDs (limited results so far)...")
depri_results = self._check_tlds_parallel(hostname, deprioritized_tlds, "deprioritized")
valid_domains.update(depri_results)
self._update_progress(f"Phase 3 complete: {len(depri_results)} deprioritized TLD matches", 9)
else:
logger.info(f"⏭️ Skipping deprioritized TLDs (found {len(valid_domains)} matches already)")
logger.info(f"🎯 Smart TLD expansion complete: found {len(valid_domains)} valid domains")
return valid_domains
def _check_tlds_parallel(self, hostname: str, tlds: List[str], phase_name: str) -> Set[str]:
"""Check TLDs in parallel with optimized settings."""
valid_domains = set()
tested_count = 0 tested_count = 0
wildcard_detected = set()
for i, tld in enumerate(tlds): # Use thread pool for parallel processing
full_hostname = f"{hostname}.{tld}" max_workers = min(20, len(tlds)) # Limit concurrent requests
# Quick check if domain resolves logger.info(f"⚡ Starting parallel check of {len(tlds)} {phase_name} TLDs "
ips = self.dns_resolver.resolve_hostname(full_hostname) f"with {max_workers} workers")
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all tasks
future_to_tld = {
executor.submit(self._check_single_tld, hostname, tld): tld
for tld in tlds
}
# Process results as they complete
for future in concurrent.futures.as_completed(future_to_tld):
tld = future_to_tld[future]
tested_count += 1 tested_count += 1
if ips: try:
logger.info(f"✅ Found valid domain: {full_hostname} -> {ips}") result = future.result(timeout=10) # 10 second timeout per future
if result:
full_hostname, ips = result
logger.info(f"✅ Valid domain found: {full_hostname} -> {ips}")
self.data.add_hostname(full_hostname, 0) self.data.add_hostname(full_hostname, 0)
targets.add(full_hostname) valid_domains.add(full_hostname)
for ip in ips: for ip in ips:
self.data.add_ip_address(ip) self.data.add_ip_address(ip)
# Progress update every 100 TLDs # Progress update every 50 TLDs in this phase
if i % 100 == 0: if tested_count % 50 == 0:
progress = 5 + int((i / len(tlds)) * 5) # 5-10% range logger.info(f"📊 {phase_name.title()} phase progress: "
self._update_progress(f"Checked {i}/{len(tlds)} TLDs, found {len(targets)} valid domains", progress) f"{tested_count}/{len(tlds)} tested, "
f"{len(valid_domains)} found")
logger.info(f"🎯 TLD expansion complete: tested {tested_count} TLDs, found {len(targets)} valid domains") except concurrent.futures.TimeoutError:
return targets logger.debug(f"⏱️ Timeout checking {hostname}.{tld}")
except Exception as e:
logger.debug(f"⚠️ Error checking {hostname}.{tld}: {e}")
logger.info(f"📊 {phase_name.title()} phase complete: "
f"tested {tested_count} TLDs, found {len(valid_domains)} valid domains, "
f"detected {len(wildcard_detected)} wildcards")
return valid_domains
def _check_single_tld(self, hostname: str, tld: str) -> Optional[Tuple[str, List[str]]]:
"""Check a single TLD combination with optimized DNS resolution."""
full_hostname = f"{hostname}.{tld}"
# Use faster DNS resolution with shorter timeout for TLD testing
ips = self.dns_resolver.resolve_hostname_fast(full_hostname)
if ips:
logger.debug(f"{full_hostname} -> {ips}")
return (full_hostname, ips)
return None
def _process_targets_recursively(self, targets: Set[str]): def _process_targets_recursively(self, targets: Set[str]):
"""Process targets with recursive subdomain discovery.""" """Process targets with recursive subdomain discovery."""
@ -161,7 +240,7 @@ class ReconnaissanceEngine:
new_targets = set() new_targets = set()
for target in targets: for target in targets:
logger.debug(f"🔍 Processing target: {target}") logger.debug(f"🎯 Processing target: {target}")
# DNS resolution and record gathering # DNS resolution and record gathering
self._process_single_target(target, current_depth) self._process_single_target(target, current_depth)
@ -223,7 +302,7 @@ class ReconnaissanceEngine:
self.data.certificates[hostname] self.data.certificates[hostname]
) )
new_subdomains.update(cert_subdomains) new_subdomains.update(cert_subdomains)
logger.debug(f"🔐 Extracted {len(cert_subdomains)} subdomains from certificates of {hostname}") logger.debug(f"🔍 Extracted {len(cert_subdomains)} subdomains from certificates of {hostname}")
# Filter out already known hostnames # Filter out already known hostnames
filtered_subdomains = new_subdomains - self.data.hostnames filtered_subdomains = new_subdomains - self.data.hostnames
@ -314,3 +393,8 @@ class ReconnaissanceEngine:
'virustotal_results': len(self.data.virustotal_results) 'virustotal_results': len(self.data.virustotal_results)
} }
logger.info(f"📊 External lookups summary: {ext_stats}") logger.info(f"📊 External lookups summary: {ext_stats}")
# Keep the original method name for backward compatibility
def _expand_hostname_to_tlds(self, hostname: str) -> Set[str]:
"""Legacy method - redirects to smart expansion."""
return self._expand_hostname_to_tlds_smart(hostname)

View File

@ -1,9 +1,9 @@
# File: src/tld_fetcher.py # File: src/tld_fetcher.py
"""Fetch and cache IANA TLD list.""" """Fetch and cache IANA TLD list with smart prioritization."""
import requests import requests
import logging import logging
from typing import List, Set, Optional from typing import List, Set, Optional, Tuple
import os import os
import time import time
@ -11,15 +11,43 @@ import time
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class TLDFetcher: class TLDFetcher:
"""Fetches and caches IANA TLD list.""" """Fetches and caches IANA TLD list with smart prioritization."""
IANA_TLD_URL = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" IANA_TLD_URL = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
CACHE_FILE = "tlds_cache.txt" CACHE_FILE = "tlds_cache.txt"
CACHE_DURATION = 86400 # 24 hours in seconds CACHE_DURATION = 86400 # 24 hours in seconds
# Common TLDs that should be checked first (high success rate)
PRIORITY_TLDS = {
# Generic top-level domains (most common)
'com', 'org', 'net', 'edu', 'gov', 'mil', 'int', 'info', 'biz', 'name',
'io', 'co', 'me', 'tv', 'cc', 'ly', 'to', 'us', 'uk', 'ca',
# Major country codes (high usage)
'de', 'fr', 'it', 'es', 'nl', 'be', 'ch', 'at', 'se', 'no', 'dk', 'fi',
'au', 'nz', 'jp', 'kr', 'cn', 'hk', 'sg', 'my', 'th', 'in', 'br', 'mx',
'ru', 'pl', 'cz', 'hu', 'ro', 'bg', 'hr', 'si', 'sk', 'lt', 'lv', 'ee',
'ie', 'pt', 'gr', 'cy', 'mt', 'lu', 'is', 'tr', 'il', 'za', 'ng', 'eg',
# Popular new gTLDs (established, not spam-prone)
'app', 'dev', 'tech', 'blog', 'news', 'shop', 'store', 'cloud', 'digital',
'website', 'site', 'online', 'world', 'global', 'international'
}
# TLDs to deprioritize (often have wildcard DNS or low-quality domains)
DEPRIORITIZED_PATTERNS = [
'xn--', # Internationalized domain names (often less common)
# These TLDs are known for high wildcard/parking rates
'tk', 'ml', 'ga', 'cf', # Free TLDs often misused
'top', 'win', 'download', 'stream', 'science', 'click', 'link',
'loan', 'men', 'racing', 'review', 'party', 'trade', 'date',
'cricket', 'accountant', 'faith', 'gdn', 'realtor'
]
def __init__(self): def __init__(self):
self._tlds: Optional[Set[str]] = None self._tlds: Optional[Set[str]] = None
logger.info("🌐 TLD fetcher initialized") self._prioritized_tlds: Optional[Tuple[List[str], List[str], List[str]]] = None
logger.info("🌐 TLD fetcher initialized with smart prioritization")
def get_tlds(self) -> Set[str]: def get_tlds(self) -> Set[str]:
"""Get list of TLDs, using cache if available.""" """Get list of TLDs, using cache if available."""
@ -29,6 +57,40 @@ class TLDFetcher:
logger.info(f"✅ Loaded {len(self._tlds)} TLDs") logger.info(f"✅ Loaded {len(self._tlds)} TLDs")
return self._tlds return self._tlds
def get_prioritized_tlds(self) -> Tuple[List[str], List[str], List[str]]:
"""Get TLDs sorted by priority: (priority, normal, deprioritized)."""
if self._prioritized_tlds is None:
all_tlds = self.get_tlds()
logger.debug("📊 Categorizing TLDs by priority...")
priority_list = []
normal_list = []
deprioritized_list = []
for tld in all_tlds:
tld_lower = tld.lower()
if tld_lower in self.PRIORITY_TLDS:
priority_list.append(tld_lower)
elif any(pattern in tld_lower for pattern in self.DEPRIORITIZED_PATTERNS):
deprioritized_list.append(tld_lower)
else:
normal_list.append(tld_lower)
# Sort each category alphabetically for consistency
priority_list.sort()
normal_list.sort()
deprioritized_list.sort()
self._prioritized_tlds = (priority_list, normal_list, deprioritized_list)
logger.info(f"📊 TLD prioritization complete: "
f"{len(priority_list)} priority, "
f"{len(normal_list)} normal, "
f"{len(deprioritized_list)} deprioritized")
return self._prioritized_tlds
def _load_tlds(self) -> Set[str]: def _load_tlds(self) -> Set[str]:
"""Load TLDs from cache or fetch from IANA.""" """Load TLDs from cache or fetch from IANA."""
if self._is_cache_valid(): if self._is_cache_valid():
@ -115,28 +177,37 @@ class TLDFetcher:
return self._get_fallback_tlds() return self._get_fallback_tlds()
def _get_fallback_tlds(self) -> Set[str]: def _get_fallback_tlds(self) -> Set[str]:
"""Return a minimal set of common TLDs if fetch fails.""" """Return a minimal set of short TLDs if fetch fails."""
logger.warning("⚠️ Using fallback TLD list") logger.warning("⚠️ Using fallback TLD list")
# Use only short, well-established TLDs as fallback
fallback_tlds = { fallback_tlds = {
# Generic top-level domains # 2-character TLDs (country codes - most established)
'com', 'org', 'net', 'edu', 'gov', 'mil', 'int', 'info', 'biz', 'name', 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'ao', 'aq', 'ar', 'as', 'at',
'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi',
'bj', 'bl', 'bm', 'bn', 'bo', 'bq', 'br', 'bs', 'bt', 'bv', 'bw', 'by',
'bz', 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn',
'co', 'cr', 'cu', 'cv', 'cw', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm',
'do', 'dz', 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj',
'fk', 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi',
'gl', 'gm', 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk',
'hm', 'hn', 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq',
'ir', 'is', 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km',
'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr',
'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mf', 'mg', 'mh',
'mk', 'ml', 'mm', 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv',
'mw', 'mx', 'my', 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no',
'np', 'nr', 'nu', 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl',
'pm', 'pn', 'pr', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru',
'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl',
'sm', 'sn', 'so', 'sr', 'ss', 'st', 'sv', 'sx', 'sy', 'sz', 'tc', 'td',
'tf', 'tg', 'th', 'tj', 'tk', 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv',
'tw', 'tz', 'ua', 'ug', 'uk', 'um', 'us', 'uy', 'uz', 'va', 'vc', 've',
'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'ye', 'yt', 'za', 'zm', 'zw',
# Country code top-level domains (major ones) # 3-character TLDs (generic - most common)
'us', 'uk', 'de', 'fr', 'it', 'es', 'nl', 'be', 'ch', 'at', 'se', 'no', 'com', 'org', 'net', 'edu', 'gov', 'mil', 'int'
'dk', 'fi', 'pl', 'cz', 'hu', 'ro', 'bg', 'hr', 'si', 'sk', 'lt', 'lv',
'ee', 'ie', 'pt', 'gr', 'cy', 'mt', 'lu', 'is', 'li', 'ad', 'mc', 'sm',
'va', 'by', 'ua', 'md', 'ru', 'kz', 'kg', 'tj', 'tm', 'uz', 'am', 'az',
'ge', 'tr', 'il', 'jo', 'lb', 'sy', 'iq', 'ir', 'af', 'pk', 'in', 'lk',
'mv', 'bt', 'bd', 'np', 'mm', 'th', 'la', 'kh', 'vn', 'my', 'sg', 'bn',
'id', 'tl', 'ph', 'tw', 'hk', 'mo', 'cn', 'kp', 'kr', 'jp', 'mn',
# Common compound TLDs
'co.uk', 'org.uk', 'ac.uk', 'gov.uk', 'com.au', 'org.au', 'net.au',
'gov.au', 'edu.au', 'co.za', 'org.za', 'net.za', 'gov.za', 'ac.za',
'co.nz', 'org.nz', 'net.nz', 'govt.nz', 'ac.nz', 'co.jp', 'or.jp',
'ne.jp', 'go.jp', 'ac.jp', 'ad.jp', 'ed.jp', 'gr.jp', 'lg.jp'
} }
logger.info(f"📋 Using {len(fallback_tlds)} fallback TLDs") logger.info(f"📋 Using {len(fallback_tlds)} fallback TLDs (≤3 characters)")
return fallback_tlds return fallback_tlds

View File

@ -319,7 +319,7 @@ class ReconTool {
const hostnameList = document.querySelector('#recentHostnames .hostname-list'); const hostnameList = document.querySelector('#recentHostnames .hostname-list');
if (hostnameList && data.hostnames && data.hostnames.length > 0) { if (hostnameList && data.hostnames && data.hostnames.length > 0) {
// Show last 10 hostnames // Show last 10 hostnames
const recentHostnames = data.hostnames.slice(-10); const recentHostnames = data.hostnames;
hostnameList.innerHTML = recentHostnames.map(hostname => hostnameList.innerHTML = recentHostnames.map(hostname =>
`<span class="discovery-item">${hostname}</span>` `<span class="discovery-item">${hostname}</span>`
).join(''); ).join('');
@ -332,7 +332,7 @@ class ReconTool {
const ipList = document.querySelector('#recentIPs .ip-list'); const ipList = document.querySelector('#recentIPs .ip-list');
if (ipList && data.ip_addresses && data.ip_addresses.length > 0) { if (ipList && data.ip_addresses && data.ip_addresses.length > 0) {
// Show last 10 IPs // Show last 10 IPs
const recentIPs = data.ip_addresses.slice(-10); const recentIPs = data.ip_addresses;
ipList.innerHTML = recentIPs.map(ip => ipList.innerHTML = recentIPs.map(ip =>
`<span class="discovery-item">${ip}</span>` `<span class="discovery-item">${ip}</span>`
).join(''); ).join('');