204 lines
7.2 KiB
Python
204 lines
7.2 KiB
Python
# File: src/data_structures.py
|
|
"""Data structures for storing reconnaissance results."""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Set, Optional, Any
|
|
from datetime import datetime
|
|
import json
|
|
import logging
|
|
|
|
# Set up logging for this module
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@dataclass
|
|
class DNSRecord:
|
|
"""DNS record information."""
|
|
record_type: str
|
|
value: str
|
|
ttl: Optional[int] = None
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
'record_type': self.record_type,
|
|
'value': self.value,
|
|
'ttl': self.ttl
|
|
}
|
|
|
|
@dataclass
|
|
class Certificate:
|
|
"""Certificate information from crt.sh."""
|
|
id: int
|
|
issuer: str
|
|
subject: str
|
|
not_before: datetime
|
|
not_after: datetime
|
|
is_wildcard: bool = False
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
'id': self.id,
|
|
'issuer': self.issuer,
|
|
'subject': self.subject,
|
|
'not_before': self.not_before.isoformat() if self.not_before else None,
|
|
'not_after': self.not_after.isoformat() if self.not_after else None,
|
|
'is_wildcard': self.is_wildcard
|
|
}
|
|
|
|
@dataclass
|
|
class ShodanResult:
|
|
"""Shodan scan result."""
|
|
ip: str
|
|
ports: List[int]
|
|
services: Dict[str, Any]
|
|
organization: Optional[str] = None
|
|
country: Optional[str] = None
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
'ip': self.ip,
|
|
'ports': self.ports,
|
|
'services': self.services,
|
|
'organization': self.organization,
|
|
'country': self.country
|
|
}
|
|
|
|
@dataclass
|
|
class VirusTotalResult:
|
|
"""VirusTotal scan result."""
|
|
resource: str # IP or domain
|
|
positives: int
|
|
total: int
|
|
scan_date: datetime
|
|
permalink: str
|
|
|
|
def to_dict(self) -> dict:
|
|
return {
|
|
'resource': self.resource,
|
|
'positives': self.positives,
|
|
'total': self.total,
|
|
'scan_date': self.scan_date.isoformat() if self.scan_date else None,
|
|
'permalink': self.permalink
|
|
}
|
|
|
|
@dataclass
|
|
class ReconData:
|
|
"""Main data structure for reconnaissance results."""
|
|
|
|
# Core data
|
|
hostnames: Set[str] = field(default_factory=set)
|
|
ip_addresses: Set[str] = field(default_factory=set)
|
|
|
|
# DNS information
|
|
dns_records: Dict[str, List[DNSRecord]] = field(default_factory=dict)
|
|
reverse_dns: Dict[str, str] = field(default_factory=dict)
|
|
|
|
# Certificate information
|
|
certificates: Dict[str, List[Certificate]] = field(default_factory=dict)
|
|
|
|
# External service results
|
|
shodan_results: Dict[str, ShodanResult] = field(default_factory=dict)
|
|
virustotal_results: Dict[str, VirusTotalResult] = field(default_factory=dict)
|
|
|
|
# Metadata
|
|
start_time: datetime = field(default_factory=datetime.now)
|
|
end_time: Optional[datetime] = None
|
|
depth_map: Dict[str, int] = field(default_factory=dict) # Track recursion depth
|
|
|
|
def add_hostname(self, hostname: str, depth: int = 0) -> None:
|
|
"""Add a hostname to the dataset."""
|
|
hostname = hostname.lower()
|
|
self.hostnames.add(hostname)
|
|
self.depth_map[hostname] = depth
|
|
logger.info(f"Added hostname: {hostname} (depth: {depth})")
|
|
|
|
def add_ip_address(self, ip: str) -> None:
|
|
"""Add an IP address to the dataset."""
|
|
self.ip_addresses.add(ip)
|
|
logger.info(f"Added IP address: {ip}")
|
|
|
|
def add_dns_record(self, hostname: str, record: DNSRecord) -> None:
|
|
"""Add a DNS record for a hostname."""
|
|
hostname = hostname.lower()
|
|
if hostname not in self.dns_records:
|
|
self.dns_records[hostname] = []
|
|
self.dns_records[hostname].append(record)
|
|
logger.debug(f"Added DNS record for {hostname}: {record.record_type} -> {record.value}")
|
|
|
|
def add_shodan_result(self, ip: str, result: ShodanResult) -> None:
|
|
"""Add Shodan result."""
|
|
self.shodan_results[ip] = result
|
|
logger.info(f"Added Shodan result for {ip}: {len(result.ports)} ports, org: {result.organization}")
|
|
|
|
def add_virustotal_result(self, resource: str, result: VirusTotalResult) -> None:
|
|
"""Add VirusTotal result."""
|
|
self.virustotal_results[resource] = result
|
|
logger.info(f"Added VirusTotal result for {resource}: {result.positives}/{result.total} detections")
|
|
|
|
def get_new_subdomains(self, max_depth: int) -> Set[str]:
|
|
"""Get subdomains that haven't been processed yet and are within depth limit."""
|
|
new_domains = set()
|
|
for hostname in self.hostnames:
|
|
if (hostname not in self.dns_records and
|
|
self.depth_map.get(hostname, 0) < max_depth):
|
|
new_domains.add(hostname)
|
|
return new_domains
|
|
|
|
def get_stats(self) -> Dict[str, int]:
|
|
"""Get current statistics."""
|
|
return {
|
|
'hostnames': len(self.hostnames),
|
|
'ip_addresses': len(self.ip_addresses),
|
|
'dns_records': sum(len(records) for records in self.dns_records.values()),
|
|
'certificates': sum(len(certs) for certs in self.certificates.values()),
|
|
'shodan_results': len(self.shodan_results),
|
|
'virustotal_results': len(self.virustotal_results)
|
|
}
|
|
|
|
def to_dict(self) -> dict:
|
|
"""Export data as a serializable dictionary."""
|
|
logger.debug(f"Serializing ReconData with stats: {self.get_stats()}")
|
|
|
|
result = {
|
|
'hostnames': sorted(list(self.hostnames)),
|
|
'ip_addresses': sorted(list(self.ip_addresses)),
|
|
'dns_records': {
|
|
host: [record.to_dict() for record in records]
|
|
for host, records in self.dns_records.items()
|
|
},
|
|
'reverse_dns': dict(self.reverse_dns),
|
|
'certificates': {
|
|
host: [cert.to_dict() for cert in certs]
|
|
for host, certs in self.certificates.items()
|
|
},
|
|
'shodan_results': {
|
|
ip: result.to_dict() for ip, result in self.shodan_results.items()
|
|
},
|
|
'virustotal_results': {
|
|
resource: result.to_dict() for resource, result in self.virustotal_results.items()
|
|
},
|
|
'depth_map': dict(self.depth_map),
|
|
'metadata': {
|
|
'start_time': self.start_time.isoformat() if self.start_time else None,
|
|
'end_time': self.end_time.isoformat() if self.end_time else None,
|
|
'stats': self.get_stats()
|
|
}
|
|
}
|
|
|
|
logger.info(f"Serialized data contains: {len(result['hostnames'])} hostnames, "
|
|
f"{len(result['ip_addresses'])} IPs, {len(result['shodan_results'])} Shodan results, "
|
|
f"{len(result['virustotal_results'])} VirusTotal results")
|
|
|
|
return result
|
|
|
|
def to_json(self) -> str:
|
|
"""Export data as JSON."""
|
|
try:
|
|
return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)
|
|
except Exception as e:
|
|
logger.error(f"Failed to serialize to JSON: {e}")
|
|
# Return minimal JSON in case of error
|
|
return json.dumps({
|
|
'error': str(e),
|
|
'stats': self.get_stats(),
|
|
'timestamp': datetime.now().isoformat()
|
|
}, indent=2) |