# File: src/data_structures.py """Data structures for storing reconnaissance results.""" from dataclasses import dataclass, field from typing import Dict, List, Set, Optional, Any from datetime import datetime import json import logging # Set up logging for this module logger = logging.getLogger(__name__) @dataclass class DNSRecord: """DNS record information.""" record_type: str value: str ttl: Optional[int] = None def to_dict(self) -> dict: return { 'record_type': self.record_type, 'value': self.value, 'ttl': self.ttl } @dataclass class Certificate: """Certificate information from crt.sh.""" id: int issuer: str subject: str not_before: datetime not_after: datetime is_wildcard: bool = False def to_dict(self) -> dict: return { 'id': self.id, 'issuer': self.issuer, 'subject': self.subject, 'not_before': self.not_before.isoformat() if self.not_before else None, 'not_after': self.not_after.isoformat() if self.not_after else None, 'is_wildcard': self.is_wildcard } @dataclass class ShodanResult: """Shodan scan result.""" ip: str ports: List[int] services: Dict[str, Any] organization: Optional[str] = None country: Optional[str] = None def to_dict(self) -> dict: return { 'ip': self.ip, 'ports': self.ports, 'services': self.services, 'organization': self.organization, 'country': self.country } @dataclass class VirusTotalResult: """VirusTotal scan result.""" resource: str # IP or domain positives: int total: int scan_date: datetime permalink: str def to_dict(self) -> dict: return { 'resource': self.resource, 'positives': self.positives, 'total': self.total, 'scan_date': self.scan_date.isoformat() if self.scan_date else None, 'permalink': self.permalink } @dataclass class ReconData: """Main data structure for reconnaissance results.""" # Core data hostnames: Set[str] = field(default_factory=set) ip_addresses: Set[str] = field(default_factory=set) # DNS information dns_records: Dict[str, List[DNSRecord]] = field(default_factory=dict) reverse_dns: Dict[str, str] = field(default_factory=dict) # Certificate information certificates: Dict[str, List[Certificate]] = field(default_factory=dict) # External service results shodan_results: Dict[str, ShodanResult] = field(default_factory=dict) virustotal_results: Dict[str, VirusTotalResult] = field(default_factory=dict) # Metadata start_time: datetime = field(default_factory=datetime.now) end_time: Optional[datetime] = None depth_map: Dict[str, int] = field(default_factory=dict) # Track recursion depth def add_hostname(self, hostname: str, depth: int = 0) -> None: """Add a hostname to the dataset.""" hostname = hostname.lower() self.hostnames.add(hostname) self.depth_map[hostname] = depth logger.info(f"Added hostname: {hostname} (depth: {depth})") def add_ip_address(self, ip: str) -> None: """Add an IP address to the dataset.""" self.ip_addresses.add(ip) logger.info(f"Added IP address: {ip}") def add_dns_record(self, hostname: str, record: DNSRecord) -> None: """Add a DNS record for a hostname.""" hostname = hostname.lower() if hostname not in self.dns_records: self.dns_records[hostname] = [] self.dns_records[hostname].append(record) logger.debug(f"Added DNS record for {hostname}: {record.record_type} -> {record.value}") def add_shodan_result(self, ip: str, result: ShodanResult) -> None: """Add Shodan result.""" self.shodan_results[ip] = result logger.info(f"Added Shodan result for {ip}: {len(result.ports)} ports, org: {result.organization}") def add_virustotal_result(self, resource: str, result: VirusTotalResult) -> None: """Add VirusTotal result.""" self.virustotal_results[resource] = result logger.info(f"Added VirusTotal result for {resource}: {result.positives}/{result.total} detections") def get_new_subdomains(self, max_depth: int) -> Set[str]: """Get subdomains that haven't been processed yet and are within depth limit.""" new_domains = set() for hostname in self.hostnames: if (hostname not in self.dns_records and self.depth_map.get(hostname, 0) < max_depth): new_domains.add(hostname) return new_domains def get_stats(self) -> Dict[str, int]: """Get current statistics.""" return { 'hostnames': len(self.hostnames), 'ip_addresses': len(self.ip_addresses), 'dns_records': sum(len(records) for records in self.dns_records.values()), 'certificates': sum(len(certs) for certs in self.certificates.values()), 'shodan_results': len(self.shodan_results), 'virustotal_results': len(self.virustotal_results) } def to_dict(self) -> dict: """Export data as a serializable dictionary.""" logger.debug(f"Serializing ReconData with stats: {self.get_stats()}") result = { 'hostnames': sorted(list(self.hostnames)), 'ip_addresses': sorted(list(self.ip_addresses)), 'dns_records': { host: [record.to_dict() for record in records] for host, records in self.dns_records.items() }, 'reverse_dns': dict(self.reverse_dns), 'certificates': { host: [cert.to_dict() for cert in certs] for host, certs in self.certificates.items() }, 'shodan_results': { ip: result.to_dict() for ip, result in self.shodan_results.items() }, 'virustotal_results': { resource: result.to_dict() for resource, result in self.virustotal_results.items() }, 'depth_map': dict(self.depth_map), 'metadata': { 'start_time': self.start_time.isoformat() if self.start_time else None, 'end_time': self.end_time.isoformat() if self.end_time else None, 'stats': self.get_stats() } } logger.info(f"Serialized data contains: {len(result['hostnames'])} hostnames, " f"{len(result['ip_addresses'])} IPs, {len(result['shodan_results'])} Shodan results, " f"{len(result['virustotal_results'])} VirusTotal results") return result def to_json(self) -> str: """Export data as JSON.""" try: return json.dumps(self.to_dict(), indent=2, ensure_ascii=False) except Exception as e: logger.error(f"Failed to serialize to JSON: {e}") # Return minimal JSON in case of error return json.dumps({ 'error': str(e), 'stats': self.get_stats(), 'timestamp': datetime.now().isoformat() }, indent=2)