diff --git a/core/scanner.py b/core/scanner.py index 01564fa..4cff43c 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -16,7 +16,6 @@ from providers.crtsh_provider import CrtShProvider from providers.dns_provider import DNSProvider from providers.shodan_provider import ShodanProvider from providers.virustotal_provider import VirusTotalProvider -from config import config class ScanStatus: @@ -202,7 +201,7 @@ class Scanner: # Start scan in separate thread print(f"Starting scan thread for scanner {id(self)}...") self.scan_thread = threading.Thread( - target=self._execute_scan_async, + target=self._execute_scan, args=(self.current_target, max_depth), daemon=True ) @@ -216,15 +215,15 @@ class Scanner: traceback.print_exc() return False - async def _execute_scan_async(self, target_domain: str, max_depth: int) -> None: + def _execute_scan(self, target_domain: str, max_depth: int) -> None: """ - Execute the reconnaissance scan asynchronously with concurrent provider queries. + Execute the reconnaissance scan with concurrent provider queries. Args: target_domain: Target domain to investigate max_depth: Maximum recursion depth """ - print(f"_execute_scan_async started for {target_domain} with depth {max_depth}") + print(f"_execute_scan started for {target_domain} with depth {max_depth}") self.executor = ThreadPoolExecutor(max_workers=self.max_workers) try: @@ -275,14 +274,21 @@ class Scanner: for discovered_domain in discovered_domains: if discovered_domain not in processed_domains: next_level_domains.add(discovered_domain) - print(f"Adding {discovered_domain} to next level") + print(f"Adding {discovered_domain} to next level from domain query") if self.stop_event.is_set(): break if all_discovered_ips: print(f"Processing {len(all_discovered_ips)} discovered IP addresses") - self._process_ips_concurrent(all_discovered_ips) + # MODIFICATION START: Capture new domains from IP processing + new_domains_from_ips = self._process_ips_concurrent(all_discovered_ips) + if depth < max_depth: + for new_domain in new_domains_from_ips: + if new_domain not in processed_domains: + next_level_domains.add(new_domain) + print(f"Adding {new_domain} to next level from IP query") + # MODIFICATION END current_level_domains = next_level_domains print(f"Completed depth {depth}, {len(next_level_domains)} domains for next level") @@ -341,12 +347,14 @@ class Scanner: print(f"Error processing domain {domain}: {e}") return results - def _process_ips_concurrent(self, ips: Set[str]) -> None: + def _process_ips_concurrent(self, ips: Set[str]) -> Set[str]: # MODIFICATION: Changed return type """ Process multiple IP addresses concurrently. """ + all_discovered_domains = set() # NEW: Set to aggregate all results if not ips or self.stop_event.is_set(): - return + return all_discovered_domains # MODIFICATION: Return the new set + print(f"Processing {len(ips)} IP addresses concurrently") future_to_ip = { self.executor.submit(self._query_providers_for_ip, ip): ip @@ -358,10 +366,15 @@ class Scanner: continue ip = future_to_ip[future] try: - future.result() # Just wait for completion - print(f"Completed processing IP: {ip}") + # NEW: Get the set of domains from the future's result and update our aggregate set + discovered_domains_from_ip = future.result() + all_discovered_domains.update(discovered_domains_from_ip) + print(f"Completed processing IP: {ip}, found {len(discovered_domains_from_ip)} new domains.") except (Exception, CancelledError) as e: print(f"Error processing IP {ip}: {e}") + + return all_discovered_domains # MODIFICATION: Return the final aggregated set + def _query_providers_for_domain(self, domain: str) -> Tuple[Set[str], Set[str]]: """ @@ -373,20 +386,10 @@ class Scanner: discovered_ips = set() all_relationships = [] - # Comprehensive metadata collection for this domain - domain_metadata = { - 'dns_records': [], - 'related_domains_san': [], - 'shodan': {}, - 'virustotal': {}, - 'certificate_data': {}, - 'passive_dns': [], - } - if not self.providers or self.stop_event.is_set(): return discovered_domains, discovered_ips - # Query all providers concurrently + # Step 1: Query all providers and gather all relationships with ThreadPoolExecutor(max_workers=len(self.providers)) as provider_executor: future_to_provider = { provider_executor.submit(self._safe_provider_query_domain, provider, domain): provider @@ -397,89 +400,88 @@ class Scanner: if self.stop_event.is_set(): future.cancel() continue - + provider = future_to_provider[future] try: relationships = future.result() - print(f"Provider {provider.get_name()} returned {len(relationships)} relationships") - - # Process relationships and collect metadata + print(f"Provider {provider.get_name()} returned {len(relationships)} relationships for {domain}") for rel in relationships: source, target, rel_type, confidence, raw_data = rel - - # Add provider info to the relationship enhanced_rel = (source, target, rel_type, confidence, raw_data, provider.get_name()) all_relationships.append(enhanced_rel) - - # Collect metadata based on provider and relationship type - self._collect_node_metadata(domain, provider.get_name(), rel_type, target, raw_data, domain_metadata) - except (Exception, CancelledError) as e: print(f"Provider {provider.get_name()} failed for {domain}: {e}") - # Add the domain node with comprehensive metadata - self.graph.add_node(domain, NodeType.DOMAIN, metadata=domain_metadata) + # NEW Step 2: Group all targets by type and identify large entities + discovered_targets_by_type = defaultdict(set) + for _, target, _, _, _, _ in all_relationships: + if _is_valid_domain(target): + discovered_targets_by_type[NodeType.DOMAIN].add(target) + elif _is_valid_ip(target): + discovered_targets_by_type[NodeType.IP].add(target) - # Group relationships by type for large entity handling - relationships_by_type = defaultdict(list) - for source, target, rel_type, confidence, raw_data, provider_name in all_relationships: - relationships_by_type[rel_type].append((source, target, rel_type, confidence, raw_data, provider_name)) + targets_to_skip_recursion = set() + for node_type, targets in discovered_targets_by_type.items(): + if len(targets) > self.config.large_entity_threshold: + print(f"Large number of {node_type.value}s ({len(targets)}) found for {domain}. Creating a large entity node.") + # We can use the first relationship's type and provider for the large entity node + first_rel = next((r for r in all_relationships if r[1] in targets), None) + if first_rel: + self._handle_large_entity(domain, list(targets), first_rel[2], first_rel[5]) + targets_to_skip_recursion.update(targets) - # Handle large entities (only for SAN certificates currently) - for rel_type, relationships in relationships_by_type.items(): - if len(relationships) > config.large_entity_threshold and rel_type == RelationshipType.SAN_CERTIFICATE: - first_provider = relationships[0][5] if relationships else "multiple_providers" - self._handle_large_entity(domain, relationships, rel_type, first_provider) - # Remove these relationships from further processing - all_relationships = [rel for rel in all_relationships if not (rel[2] == rel_type and len(relationships_by_type[rel_type]) > config.large_entity_threshold)] - # Track DNS records to create (avoid duplicates) + # Step 3: Process all relationships to create/update nodes and edges + domain_metadata = defaultdict(lambda: defaultdict(list)) dns_records_to_create = {} - # Process remaining relationships for source, target, rel_type, confidence, raw_data, provider_name in all_relationships: if self.stop_event.is_set(): break - # Determine how to handle the target based on relationship type and content + # Special handling for crt.sh to distribute certificate metadata + if provider_name == 'crtsh' and 'domain_certificates' in raw_data: + domain_certs = raw_data.get('domain_certificates', {}) + for cert_domain, cert_summary in domain_certs.items(): + if _is_valid_domain(cert_domain): + # Create the node with its metadata. If node exists, metadata is updated. + self.graph.add_node(cert_domain, NodeType.DOMAIN, metadata={'certificate_data': cert_summary}) + + # General metadata collection for the source domain + self._collect_node_metadata(source, provider_name, rel_type, target, raw_data, domain_metadata[source]) + + # Add nodes and edges to the graph + if target in targets_to_skip_recursion: + continue + if _is_valid_ip(target): - # Create IP node and relationship self.graph.add_node(target, NodeType.IP) - if self.graph.add_edge(source, target, rel_type, confidence, provider_name, raw_data): print(f"Added IP relationship: {source} -> {target} ({rel_type.relationship_name})") - - # Add to recursion if it's a direct resolution if rel_type in [RelationshipType.A_RECORD, RelationshipType.AAAA_RECORD]: discovered_ips.add(target) - + elif target.startswith('AS') and target[2:].isdigit(): - # Create ASN node and relationship self.graph.add_node(target, NodeType.ASN) - if self.graph.add_edge(source, target, rel_type, confidence, provider_name, raw_data): print(f"Added ASN relationship: {source} -> {target} ({rel_type.relationship_name})") - + elif _is_valid_domain(target): - # Create domain node and relationship + # Ensure the target node exists before adding an edge self.graph.add_node(target, NodeType.DOMAIN) - if self.graph.add_edge(source, target, rel_type, confidence, provider_name, raw_data): print(f"Added domain relationship: {source} -> {target} ({rel_type.relationship_name})") - # Add to recursion for specific relationship types recurse_types = [ - RelationshipType.CNAME_RECORD, - RelationshipType.MX_RECORD, - RelationshipType.SAN_CERTIFICATE, - RelationshipType.NS_RECORD, + RelationshipType.CNAME_RECORD, RelationshipType.MX_RECORD, + RelationshipType.SAN_CERTIFICATE, RelationshipType.NS_RECORD, RelationshipType.PASSIVE_DNS ] if rel_type in recurse_types: discovered_domains.add(target) - + else: - # Handle DNS record content (TXT, SPF, CAA, etc.) + # Handle DNS record content dns_record_types = [ RelationshipType.TXT_RECORD, RelationshipType.SPF_RECORD, RelationshipType.CAA_RECORD, RelationshipType.SRV_RECORD, @@ -487,59 +489,38 @@ class Scanner: RelationshipType.RRSIG_RECORD, RelationshipType.SSHFP_RECORD, RelationshipType.TLSA_RECORD, RelationshipType.NAPTR_RECORD ] - if rel_type in dns_record_types: - # Create normalized DNS record identifier record_type = rel_type.relationship_name.upper().replace('_RECORD', '') record_content = target.strip() - - # Create a unique identifier for this DNS record content_hash = hash(record_content) & 0x7FFFFFFF dns_record_id = f"{record_type}:{content_hash}" - # Track this DNS record for creation (avoid duplicates) if dns_record_id not in dns_records_to_create: dns_records_to_create[dns_record_id] = { - 'content': record_content, - 'type': record_type, - 'domains': set(), - 'raw_data': raw_data, - 'provider_name': provider_name, - 'confidence': confidence + 'content': record_content, 'type': record_type, 'domains': set(), + 'raw_data': raw_data, 'provider_name': provider_name, 'confidence': confidence } - - # Add this domain to the DNS record's domain list dns_records_to_create[dns_record_id]['domains'].add(source) - - print(f"DNS record tracked: {source} -> {record_type} (content length: {len(record_content)})") - else: - # For other non-infrastructure targets, log but don't create nodes - print(f"Non-infrastructure relationship stored as metadata: {source} - {rel_type.relationship_name}: {target[:100]}") + + # Step 4: Update the source domain node with its collected metadata + if domain in domain_metadata: + self.graph.add_node(domain, NodeType.DOMAIN, metadata=dict(domain_metadata[domain])) - # Create DNS record nodes and their relationships + # Step 5: Create DNS record nodes and edges for dns_record_id, record_info in dns_records_to_create.items(): - if self.stop_event.is_set(): - break - record_metadata = { - 'record_type': record_info['type'], - 'content': record_info['content'], + 'record_type': record_info['type'], 'content': record_info['content'], 'content_hash': dns_record_id.split(':')[1], 'associated_domains': list(record_info['domains']), 'source_data': record_info['raw_data'] } - - # Create the DNS record node self.graph.add_node(dns_record_id, NodeType.DNS_RECORD, metadata=record_metadata) - - # Connect each domain to this DNS record for domain_name in record_info['domains']: - if self.graph.add_edge(domain_name, dns_record_id, RelationshipType.DNS_RECORD, - record_info['confidence'], record_info['provider_name'], - record_info['raw_data']): - print(f"Added DNS record relationship: {domain_name} -> {dns_record_id}") + self.graph.add_edge(domain_name, dns_record_id, RelationshipType.DNS_RECORD, + record_info['confidence'], record_info['provider_name'], + record_info['raw_data']) - print(f"Domain {domain}: discovered {len(discovered_domains)} domains, {len(discovered_ips)} IPs, {len(dns_records_to_create)} DNS records") + print(f"Domain {domain}: discovered {len(discovered_domains)} domains, {len(discovered_ips)} IPs") return discovered_domains, discovered_ips def _collect_node_metadata(self, node_id: str, provider_name: str, rel_type: RelationshipType, @@ -611,13 +592,15 @@ class Scanner: self.graph.add_node(entity_name, NodeType.LARGE_ENTITY, metadata={"count": len(relationships)}) self.graph.add_edge(source_domain, entity_name, rel_type, 0.9, provider_name, {"info": "Aggregated node"}) - def _query_providers_for_ip(self, ip: str) -> None: + def _query_providers_for_ip(self, ip: str) -> Set[str]: """ Query all enabled providers for information about an IP address and collect comprehensive metadata. """ print(f"Querying {len(self.providers)} providers for IP: {ip}") + discovered_hostnames = set() + if not self.providers or self.stop_event.is_set(): - return + return discovered_hostnames # Comprehensive metadata collection for this IP ip_metadata = { @@ -646,35 +629,51 @@ class Scanner: print(f"Provider {provider.get_name()} returned {len(relationships)} relationships for IP {ip}") for source, target, rel_type, confidence, raw_data in relationships: - # Add provider info to the relationship enhanced_rel = (source, target, rel_type, confidence, raw_data, provider.get_name()) all_relationships.append(enhanced_rel) - - # Collect metadata for the IP self._collect_ip_metadata(ip, provider.get_name(), rel_type, target, raw_data, ip_metadata) except (Exception, CancelledError) as e: print(f"Provider {provider.get_name()} failed for IP {ip}: {e}") + # NEW: Group all targets by type and identify large entities + discovered_targets_by_type = defaultdict(set) + for _, target, _, _, _, _ in all_relationships: + if _is_valid_domain(target): + discovered_targets_by_type[NodeType.DOMAIN].add(target) + + # THE FIX IS HERE: Initialize the set before using it. + targets_to_skip = set() + for node_type, targets in discovered_targets_by_type.items(): + if len(targets) > self.config.large_entity_threshold: + print(f"Large number of {node_type.value}s ({len(targets)}) found for IP {ip}. Creating a large entity node.") + first_rel = next((r for r in all_relationships if r[1] in targets), None) + if first_rel: + self._handle_large_entity(ip, list(targets), first_rel[2], first_rel[5]) + targets_to_skip.update(targets) + # Update the IP node with comprehensive metadata self.graph.add_node(ip, NodeType.IP, metadata=ip_metadata) # Process relationships with correct provider attribution for source, target, rel_type, confidence, raw_data, provider_name in all_relationships: - # Determine target node type + if target in targets_to_skip: + continue + if _is_valid_domain(target): target_node_type = NodeType.DOMAIN + discovered_hostnames.add(target) elif target.startswith('AS'): target_node_type = NodeType.ASN else: target_node_type = NodeType.IP - # Create/update target node self.graph.add_node(target, target_node_type) - # Add relationship with correct provider attribution if self.graph.add_edge(source, target, rel_type, confidence, provider_name, raw_data): print(f"Added IP relationship: {source} -> {target} ({rel_type.relationship_name}) from {provider_name}") + + return discovered_hostnames def _collect_ip_metadata(self, ip: str, provider_name: str, rel_type: RelationshipType, target: str, raw_data: Dict[str, Any], metadata: Dict[str, Any]) -> None: diff --git a/static/css/main.css b/static/css/main.css index c0ba131..5accf53 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -642,6 +642,20 @@ input[type="text"]:focus, select:focus { word-break: break-word; } +.detail-section-header { + font-size: 1rem; + color: #00ff41; + font-weight: 500; + margin-top: 1.5rem; + margin-bottom: 0.75rem; + padding-bottom: 0.5rem; + border-bottom: 1px solid #444; +} + +.detail-section-header:first-of-type { + margin-top: 0; +} + .copy-btn { background: none; border: none; diff --git a/static/js/graph.js b/static/js/graph.js index 9d6af11..18153cd 100644 --- a/static/js/graph.js +++ b/static/js/graph.js @@ -359,10 +359,10 @@ class GraphManager { // Style based on certificate validity if (node.type === 'domain') { - if (node.metadata && node.metadata.has_valid_cert === true) { + if (node.metadata && node.metadata.certificate_data && node.metadata.certificate_data.has_valid_cert === true) { processedNode.color = '#00ff41'; // Bright green for valid cert processedNode.borderColor = '#00aa2e'; - } else if (node.metadata && node.metadata.has_valid_cert === false) { + } else if (node.metadata && node.metadata.certificate_data && node.metadata.certificate_data.has_valid_cert === false) { processedNode.color = '#888888'; // Muted grey color processedNode.borderColor = '#666666'; // Darker grey border } diff --git a/static/js/main.js b/static/js/main.js index 01381bc..2a54782 100644 --- a/static/js/main.js +++ b/static/js/main.js @@ -742,8 +742,7 @@ class DNSReconApp { const createDetailRow = (label, value, statusIcon = '') => { const baseId = `detail-${label.replace(/[^a-zA-Z0-9]/g, '-')}`; - // Handle empty or undefined values - if (value === null || value === undefined || + if (value === null || value === undefined || (Array.isArray(value) && value.length === 0) || (typeof value === 'object' && Object.keys(value).length === 0)) { return ` @@ -754,7 +753,6 @@ class DNSReconApp { `; } - // Handle arrays if (Array.isArray(value)) { return value.map((item, index) => { const itemId = `${baseId}-${index}`; @@ -767,9 +765,7 @@ class DNSReconApp { `; }).join(''); - } - // Handle objects and primitives - else { + } else { const valueId = `${baseId}-0`; const icon = statusIcon || '
'; return ` @@ -783,44 +779,53 @@ class DNSReconApp { }; const metadata = node.metadata || {}; + + // General Node Info + detailsHtml += createDetailRow('Node Type', node.type); // Display data based on node type switch (node.type) { case 'domain': detailsHtml += createDetailRow('DNS Records', metadata.dns_records); detailsHtml += createDetailRow('Related Domains (SAN)', metadata.related_domains_san); - detailsHtml += createDetailRow('Certificate Data', metadata.certificate_data); detailsHtml += createDetailRow('Passive DNS', metadata.passive_dns); detailsHtml += createDetailRow('Shodan Data', metadata.shodan); detailsHtml += createDetailRow('VirusTotal Data', metadata.virustotal); - detailsHtml += createDetailRow('ASN Information', metadata.asn_data); break; case 'ip': - detailsHtml += createDetailRow('DNS Records', metadata.dns_records); + detailsHtml += createDetailRow('Hostnames', metadata.hostnames); detailsHtml += createDetailRow('Passive DNS', metadata.passive_dns); detailsHtml += createDetailRow('Shodan Data', metadata.shodan); detailsHtml += createDetailRow('VirusTotal Data', metadata.virustotal); - detailsHtml += createDetailRow('ASN Information', metadata.asn_data); - break; - case 'asn': - detailsHtml += createDetailRow('ASN Information', metadata.asn_data); - detailsHtml += createDetailRow('Related IPs', metadata.passive_dns); - break; - case 'large_entity': - detailsHtml += createDetailRow('Entity Type', metadata.entity_type || 'Large Collection'); - detailsHtml += createDetailRow('Item Count', metadata.count); - detailsHtml += createDetailRow('Discovered Domains', metadata.domains); break; } + + // Special handling for certificate data + if (metadata.certificate_data && Object.keys(metadata.certificate_data).length > 0) { + const cert = metadata.certificate_data; + detailsHtml += `