it

2025-09-11 21:38:04 +02:00 · 2025-09-11 20:37:43 +02:00
6 changed files with 298 additions and 413 deletions
--- a/app.py
+++ b/app.py
@ -90,6 +90,7 @@ def start_scan():
        
        target_domain = data['target_domain'].strip()
        max_depth = data.get('max_depth', config.default_recursion_depth)
+        clear_graph = data.get('clear_graph', True)
        
        print(f"Parsed - target_domain: '{target_domain}', max_depth: {max_depth}")
        
@ -114,27 +115,11 @@ def start_scan():
        user_session_id, scanner = get_user_scanner()
        print(f"Using session: {user_session_id}")
        print(f"Scanner object ID: {id(scanner)}")
-        print(f"Scanner status before start: {scanner.status}")
-        
-        # Additional safety check - if scanner is somehow in running state, force reset
-        if scanner.status == 'running':
-            print(f"WARNING: Scanner in session {user_session_id} was already running - forcing reset")
-            scanner.stop_scan()
-            # Give it a moment to stop
-            import time
-            time.sleep(1)
-            
-            # If still running, force status reset
-            if scanner.status == 'running':
-                print("WARNING: Force resetting scanner status from 'running' to 'idle'")
-                scanner.status = 'idle'
        
        # Start scan
        print(f"Calling start_scan on scanner {id(scanner)}...")
-        success = scanner.start_scan(target_domain, max_depth)
+        success = scanner.start_scan(target_domain, max_depth, clear_graph=clear_graph)
        
-        print(f"scanner.start_scan returned: {success}")
-        print(f"Scanner status after start attempt: {scanner.status}")
        
        if success:
            scan_session_id = scanner.logger.session_id
--- a/core/scanner.py
+++ b/core/scanner.py
@ -143,49 +143,40 @@ class Scanner:
        print("Session configuration updated")


-    def start_scan(self, target_domain: str, max_depth: int = 2) -> bool:
+    def start_scan(self, target_domain: str, max_depth: int = 2, clear_graph: bool = True) -> bool:
        """
-        Start a new reconnaissance scan with concurrent processing.
-        Enhanced with better debugging and state validation.
-
-        Args:
-            target_domain: Initial domain to investigate
-            max_depth: Maximum recursion depth
-
-        Returns:
-            bool: True if scan started successfully
+        Start a new reconnaissance scan.
+        Forcefully cleans up any previous scan thread before starting.
        """
        print(f"=== STARTING SCAN IN SCANNER {id(self)} ===")
-        print(f"Scanner status: {self.status}")
-        print(f"Target domain: '{target_domain}', Max depth: {max_depth}")
-        print(f"Available providers: {len(self.providers) if hasattr(self, 'providers') else 0}")
-        
-        try:
-            if self.status == ScanStatus.RUNNING:
-                print(f"ERROR: Scan already running in scanner {id(self)}, rejecting new scan")
-                print(f"Current target: {self.current_target}")
-                print(f"Current depth: {self.current_depth}")
-                return False
+        print(f"Initial scanner status: {self.status}")

+        # If a thread is still alive from a previous scan, we must wait for it to die.
+        if self.scan_thread and self.scan_thread.is_alive():
+            print("A previous scan thread is still alive. Sending termination signal and waiting...")
+            self.stop_scan()
+            self.scan_thread.join(10.0) # Wait up to 10 seconds
+
+            if self.scan_thread.is_alive():
+                print("ERROR: The previous scan thread is unresponsive and could not be stopped. Please restart the application.")
+                self.status = ScanStatus.FAILED
+                return False
+            print("Previous scan thread terminated successfully.")
+
+        # Reset state for the new scan
+        self.status = ScanStatus.IDLE
+        print(f"Scanner state is now clean for a new scan.")
+
+        try:
            # Check if we have any providers
            if not hasattr(self, 'providers') or not self.providers:
                print(f"ERROR: No providers available in scanner {id(self)}, cannot start scan")
                return False
-
+            
            print(f"Scanner {id(self)} validation passed, providers available: {[p.get_name() for p in self.providers]}")

-            # Stop any existing scan thread
-            if self.scan_thread and self.scan_thread.is_alive():
-                print(f"Stopping existing scan thread in scanner {id(self)}...")
-                self.stop_event.set()
-                self.scan_thread.join(timeout=5.0)
-                if self.scan_thread.is_alive():
-                    print(f"WARNING: Could not stop existing thread in scanner {id(self)}")
-                    return False
-
-            # Reset state
-            print(f"Resetting scanner {id(self)} state...")
-            self.graph.clear()
+            if clear_graph:
+                self.graph.clear()
            self.current_target = target_domain.lower().strip()
            self.max_depth = max_depth
            self.current_depth = 0
@ -213,6 +204,7 @@ class Scanner:
        except Exception as e:
            print(f"ERROR: Exception in start_scan for scanner {id(self)}: {e}")
            traceback.print_exc()
+            self.status = ScanStatus.FAILED
            return False

    def _execute_scan(self, target_domain: str, max_depth: int) -> None:
@ -240,9 +232,8 @@ class Scanner:
            self.graph.add_node(target_domain, NodeType.DOMAIN)

            # BFS-style exploration
-            current_level_domains = {target_domain}
-            processed_domains = set()
-            all_discovered_ips = set()
+            current_level_targets = {target_domain}
+            processed_targets = set()

            print("Starting BFS exploration...")

@ -252,46 +243,26 @@ class Scanner:
                    break

                self.current_depth = depth
-                print(f"Processing depth level {depth} with {len(current_level_domains)} domains")
+                print(f"Processing depth level {depth} with {len(current_level_targets)} targets")

-                if not current_level_domains:
-                    print("No domains to process at this level")
+                if not current_level_targets:
+                    print("No targets to process at this level")
                    break

-                self.total_indicators_found += len(current_level_domains)
-                next_level_domains = set()
-
-                domain_results = self._process_domains_concurrent(current_level_domains, processed_domains)
-
-                for domain, discovered_domains, discovered_ips in domain_results:
-                    if self.stop_event.is_set():
-                        break
-
-                    processed_domains.add(domain)
-                    all_discovered_ips.update(discovered_ips)
+                self.total_indicators_found += len(current_level_targets)
+                
+                target_results = self._process_targets_concurrent(current_level_targets, processed_targets)

+                next_level_targets = set()
+                for target, new_targets in target_results:
+                    processed_targets.add(target)
                    if depth < max_depth:
-                        for discovered_domain in discovered_domains:
-                            if discovered_domain not in processed_domains:
-                                next_level_domains.add(discovered_domain)
-                                print(f"Adding {discovered_domain} to next level from domain query")
-
-                if self.stop_event.is_set():
-                    break
-
-                if all_discovered_ips:
-                    print(f"Processing {len(all_discovered_ips)} discovered IP addresses")
-                    # MODIFICATION START: Capture new domains from IP processing
-                    new_domains_from_ips = self._process_ips_concurrent(all_discovered_ips)
-                    if depth < max_depth:
-                        for new_domain in new_domains_from_ips:
-                            if new_domain not in processed_domains:
-                                next_level_domains.add(new_domain)
-                                print(f"Adding {new_domain} to next level from IP query")
-                    # MODIFICATION END
-
-                current_level_domains = next_level_domains
-                print(f"Completed depth {depth}, {len(next_level_domains)} domains for next level")
+                        for new_target in new_targets:
+                            if new_target not in processed_targets:
+                                next_level_targets.add(new_target)
+                
+                current_level_targets = next_level_targets
+                print(f"Completed depth {depth}, {len(next_level_targets)} targets for next level")

        except Exception as e:
            print(f"ERROR: Scan execution failed with error: {e}")
@ -313,86 +284,57 @@ class Scanner:
            print("Final scan statistics:")
            print(f"  - Total nodes: {stats['basic_metrics']['total_nodes']}")
            print(f"  - Total edges: {stats['basic_metrics']['total_edges']}")
-            print(f"  - Domains processed: {len(processed_domains)}")
-            print(f"  - IPs discovered: {len(all_discovered_ips)}")
+            print(f"  - Targets processed: {len(processed_targets)}")


-    def _process_domains_concurrent(self, domains: Set[str], processed_domains: Set[str]) -> List[Tuple[str, Set[str], Set[str]]]:
-        """
-        Process multiple domains concurrently using thread pool.
-        """
+    def _process_targets_concurrent(self, targets: Set[str], processed_targets: Set[str]) -> List[Tuple[str, Set[str]]]:
+        """Process multiple targets (domains or IPs) concurrently using a thread pool."""
        results = []
-        domains_to_process = domains - processed_domains
-        if not domains_to_process:
+        targets_to_process = targets - processed_targets
+        if not targets_to_process:
            return results

-        print(f"Processing {len(domains_to_process)} domains concurrently with {self.max_workers} workers")
+        print(f"Processing {len(targets_to_process)} targets concurrently with {self.max_workers} workers")

-        future_to_domain = {
-            self.executor.submit(self._query_providers_for_domain, domain): domain
-            for domain in domains_to_process
+        future_to_target = {
+            self.executor.submit(self._query_providers_for_target, target): target
+            for target in targets_to_process
        }

-        for future in as_completed(future_to_domain):
+        for future in as_completed(future_to_target):
            if self.stop_event.is_set():
                future.cancel()
                continue
-            domain = future_to_domain[future]
+            target = future_to_target[future]
            try:
-                discovered_domains, discovered_ips = future.result()
-                results.append((domain, discovered_domains, discovered_ips))
+                new_targets = future.result()
+                results.append((target, new_targets))
                self.indicators_processed += 1
-                print(f"Completed processing domain: {domain} ({len(discovered_domains)} domains, {len(discovered_ips)} IPs)")
+                print(f"Completed processing target: {target} (found {len(new_targets)} new targets)")
            except (Exception, CancelledError) as e:
-                print(f"Error processing domain {domain}: {e}")
+                print(f"Error processing target {target}: {e}")
        return results

-    def _process_ips_concurrent(self, ips: Set[str]) -> Set[str]: # MODIFICATION: Changed return type
+    def _query_providers_for_target(self, target: str) -> Set[str]:
        """
-        Process multiple IP addresses concurrently.
-        """
-        all_discovered_domains = set() # NEW: Set to aggregate all results
-        if not ips or self.stop_event.is_set():
-            return all_discovered_domains # MODIFICATION: Return the new set
-
-        print(f"Processing {len(ips)} IP addresses concurrently")
-        future_to_ip = {
-            self.executor.submit(self._query_providers_for_ip, ip): ip
-            for ip in ips
-        }
-        for future in as_completed(future_to_ip):
-            if self.stop_event.is_set():
-                future.cancel()
-                continue
-            ip = future_to_ip[future]
-            try:
-                # NEW: Get the set of domains from the future's result and update our aggregate set
-                discovered_domains_from_ip = future.result()
-                all_discovered_domains.update(discovered_domains_from_ip)
-                print(f"Completed processing IP: {ip}, found {len(discovered_domains_from_ip)} new domains.")
-            except (Exception, CancelledError) as e:
-                print(f"Error processing IP {ip}: {e}")
-        
-        return all_discovered_domains # MODIFICATION: Return the final aggregated set
-
-
-    def _query_providers_for_domain(self, domain: str) -> Tuple[Set[str], Set[str]]:
-        """
-        Query all enabled providers for information about a domain and collect comprehensive metadata.
+        Query all enabled providers for information about a target (domain or IP) and collect comprehensive metadata.
        Creates appropriate node types and relationships based on discovered data.
        """
-        print(f"Querying {len(self.providers)} providers for domain: {domain}")
-        discovered_domains = set()
-        discovered_ips = set()
+        is_ip = _is_valid_ip(target)
+        target_type = NodeType.IP if is_ip else NodeType.DOMAIN
+        print(f"Querying {len(self.providers)} providers for {target_type.value}: {target}")
+
+        new_targets = set()
        all_relationships = []

        if not self.providers or self.stop_event.is_set():
-            return discovered_domains, discovered_ips
+            return new_targets

-        # Step 1: Query all providers and gather all relationships
        with ThreadPoolExecutor(max_workers=len(self.providers)) as provider_executor:
            future_to_provider = {
-                provider_executor.submit(self._safe_provider_query_domain, provider, domain): provider
+                provider_executor.submit(
+                    self._safe_provider_query, provider, target, is_ip
+                ): provider
                for provider in self.providers
            }
            
@ -404,38 +346,36 @@ class Scanner:
                provider = future_to_provider[future]
                try:
                    relationships = future.result()
-                    print(f"Provider {provider.get_name()} returned {len(relationships)} relationships for {domain}")
+                    print(f"Provider {provider.get_name()} returned {len(relationships)} relationships for {target}")
                    for rel in relationships:
-                        source, target, rel_type, confidence, raw_data = rel
-                        enhanced_rel = (source, target, rel_type, confidence, raw_data, provider.get_name())
+                        source, rel_target, rel_type, confidence, raw_data = rel
+                        enhanced_rel = (source, rel_target, rel_type, confidence, raw_data, provider.get_name())
                        all_relationships.append(enhanced_rel)
                except (Exception, CancelledError) as e:
-                    print(f"Provider {provider.get_name()} failed for {domain}: {e}")
+                    print(f"Provider {provider.get_name()} failed for {target}: {e}")

        # NEW Step 2: Group all targets by type and identify large entities
        discovered_targets_by_type = defaultdict(set)
-        for _, target, _, _, _, _ in all_relationships:
-            if _is_valid_domain(target):
-                discovered_targets_by_type[NodeType.DOMAIN].add(target)
-            elif _is_valid_ip(target):
-                discovered_targets_by_type[NodeType.IP].add(target)
+        for _, rel_target, _, _, _, _ in all_relationships:
+            if _is_valid_domain(rel_target):
+                discovered_targets_by_type[NodeType.DOMAIN].add(rel_target)
+            elif _is_valid_ip(rel_target):
+                discovered_targets_by_type[NodeType.IP].add(rel_target)

        targets_to_skip = set()
        for node_type, targets in discovered_targets_by_type.items():
            if len(targets) > self.config.large_entity_threshold:
-                print(f"Large number of {node_type.value}s ({len(targets)}) found for {domain}. Creating a large entity node.")
-                # We can use the first relationship's type and provider for the large entity node
+                print(f"Large number of {node_type.value}s ({len(targets)}) found for {target}. Creating a large entity node.")
                first_rel = next((r for r in all_relationships if r[1] in targets), None)
                if first_rel:
-                    self._handle_large_entity(domain, list(targets), first_rel[2], first_rel[5])
+                    self._handle_large_entity(target, list(targets), first_rel[2], first_rel[5])
                targets_to_skip.update(targets)

-
        # Step 3: Process all relationships to create/update nodes and edges
-        domain_metadata = defaultdict(lambda: defaultdict(list))
+        target_metadata = defaultdict(lambda: defaultdict(list))
        dns_records_to_create = {}

-        for source, target, rel_type, confidence, raw_data, provider_name in all_relationships:
+        for source, rel_target, rel_type, confidence, raw_data, provider_name in all_relationships:
            if self.stop_event.is_set():
                break

@ -444,33 +384,31 @@ class Scanner:
                domain_certs = raw_data.get('domain_certificates', {})
                for cert_domain, cert_summary in domain_certs.items():
                    if _is_valid_domain(cert_domain) and cert_domain not in targets_to_skip:
-                        # Create the node with its metadata. If node exists, metadata is updated.
                        self.graph.add_node(cert_domain, NodeType.DOMAIN, metadata={'certificate_data': cert_summary})

-            # General metadata collection for the source domain
-            self._collect_node_metadata(source, provider_name, rel_type, target, raw_data, domain_metadata[source])
+            # General metadata collection
+            self._collect_node_metadata(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source])

            # Add nodes and edges to the graph
-            if target in targets_to_skip:
+            if rel_target in targets_to_skip:
                continue
            
-            if _is_valid_ip(target):
-                self.graph.add_node(target, NodeType.IP)
-                if self.graph.add_edge(source, target, rel_type, confidence, provider_name, raw_data):
-                    print(f"Added IP relationship: {source} -> {target} ({rel_type.relationship_name})")
+            if _is_valid_ip(rel_target):
+                self.graph.add_node(rel_target, NodeType.IP)
+                if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
+                    print(f"Added IP relationship: {source} -> {rel_target} ({rel_type.relationship_name})")
                if rel_type in [RelationshipType.A_RECORD, RelationshipType.AAAA_RECORD]:
-                    discovered_ips.add(target)
+                    new_targets.add(rel_target)

-            elif target.startswith('AS') and target[2:].isdigit():
-                self.graph.add_node(target, NodeType.ASN)
-                if self.graph.add_edge(source, target, rel_type, confidence, provider_name, raw_data):
-                    print(f"Added ASN relationship: {source} -> {target} ({rel_type.relationship_name})")
+            elif rel_target.startswith('AS') and rel_target[2:].isdigit():
+                self.graph.add_node(rel_target, NodeType.ASN)
+                if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
+                    print(f"Added ASN relationship: {source} -> {rel_target} ({rel_type.relationship_name})")

-            elif _is_valid_domain(target):
-                # Ensure the target node exists before adding an edge
-                self.graph.add_node(target, NodeType.DOMAIN)
-                if self.graph.add_edge(source, target, rel_type, confidence, provider_name, raw_data):
-                    print(f"Added domain relationship: {source} -> {target} ({rel_type.relationship_name})")
+            elif _is_valid_domain(rel_target):
+                self.graph.add_node(rel_target, NodeType.DOMAIN)
+                if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
+                    print(f"Added domain relationship: {source} -> {rel_target} ({rel_type.relationship_name})")
                
                recurse_types = [
                    RelationshipType.CNAME_RECORD, RelationshipType.MX_RECORD,
@ -478,7 +416,7 @@ class Scanner:
                    RelationshipType.PASSIVE_DNS
                ]
                if rel_type in recurse_types:
-                    discovered_domains.add(target)
+                    new_targets.add(rel_target)
            
            else:
                 # Handle DNS record content
@ -491,7 +429,7 @@ class Scanner:
                ]
                if rel_type in dns_record_types:
                    record_type = rel_type.relationship_name.upper().replace('_RECORD', '')
-                    record_content = target.strip()
+                    record_content = rel_target.strip()
                    content_hash = hash(record_content) & 0x7FFFFFFF
                    dns_record_id = f"{record_type}:{content_hash}"
                    
@ -502,9 +440,9 @@ class Scanner:
                        }
                    dns_records_to_create[dns_record_id]['domains'].add(source)
        
-        # Step 4: Update the source domain node with its collected metadata
-        if domain in domain_metadata:
-             self.graph.add_node(domain, NodeType.DOMAIN, metadata=dict(domain_metadata[domain]))
+        # Step 4: Update the source node with its collected metadata
+        if target in target_metadata:
+             self.graph.add_node(target, target_type, metadata=dict(target_metadata[target]))

        # Step 5: Create DNS record nodes and edges
        for dns_record_id, record_info in dns_records_to_create.items():
@ -519,9 +457,8 @@ class Scanner:
                self.graph.add_edge(domain_name, dns_record_id, RelationshipType.DNS_RECORD,
                                    record_info['confidence'], record_info['provider_name'],
                                    record_info['raw_data'])
-
-        print(f"Domain {domain}: discovered {len(discovered_domains)} domains, {len(discovered_ips)} IPs")
-        return discovered_domains, discovered_ips
+        
+        return new_targets

    def _collect_node_metadata(self, node_id: str, provider_name: str, rel_type: RelationshipType, 
                            target: str, raw_data: Dict[str, Any], metadata: Dict[str, Any]) -> None:
@ -532,189 +469,41 @@ class Scanner:
            record_type = raw_data.get('query_type', 'UNKNOWN')
            value = raw_data.get('value', target)
            
-            # For non-infrastructure DNS records, store the full content
            if record_type in ['TXT', 'SPF', 'CAA']:
                dns_entry = f"{record_type}: {value}"
            else:
                dns_entry = f"{record_type}: {value}"
                
-            if dns_entry not in metadata['dns_records']:
-                metadata['dns_records'].append(dns_entry)
+            if dns_entry not in metadata.get('dns_records', []):
+                metadata.setdefault('dns_records', []).append(dns_entry)
        
        elif provider_name == 'crtsh':
            if rel_type == RelationshipType.SAN_CERTIFICATE:
-                # Handle certificate data storage on domain nodes
                domain_certs = raw_data.get('domain_certificates', {})
-                
-                # Store certificate information for this domain
                if node_id in domain_certs:
                    cert_summary = domain_certs[node_id]
-                    
-                    # Update domain metadata with certificate information
                    metadata['certificate_data'] = cert_summary
                    metadata['has_valid_cert'] = cert_summary.get('has_valid_cert', False)
-                    
-                    # Add related domains from shared certificates
                    if target not in metadata.get('related_domains_san', []):
-                        if 'related_domains_san' not in metadata:
-                            metadata['related_domains_san'] = []
-                        metadata['related_domains_san'].append(target)
-                    
-                    # Store shared certificate details for forensic analysis
+                        metadata.setdefault('related_domains_san', []).append(target)
                    shared_certs = raw_data.get('shared_certificates', [])
                    if shared_certs and 'shared_certificate_details' not in metadata:
                        metadata['shared_certificate_details'] = shared_certs
        
        elif provider_name == 'shodan':
-            # Merge Shodan data (avoid overwriting)
            for key, value in raw_data.items():
-                if key not in metadata['shodan'] or not metadata['shodan'][key]:
-                    metadata['shodan'][key] = value
+                if key not in metadata.get('shodan', {}) or not metadata.get('shodan', {}).get(key):
+                    metadata.setdefault('shodan', {})[key] = value
        
        elif provider_name == 'virustotal':
-            # Merge VirusTotal data
            for key, value in raw_data.items():
-                if key not in metadata['virustotal'] or not metadata['virustotal'][key]:
-                    metadata['virustotal'][key] = value
-            
-            # Add passive DNS entries
+                if key not in metadata.get('virustotal', {}) or not metadata.get('virustotal', {}).get(key):
+                    metadata.setdefault('virustotal', {})[key] = value
            if rel_type == RelationshipType.PASSIVE_DNS:
                passive_entry = f"Passive DNS: {target}"
-                if passive_entry not in metadata['passive_dns']:
-                    metadata['passive_dns'].append(passive_entry)
-
-    def _handle_large_entity(self, source_domain: str, relationships: list, rel_type: RelationshipType, provider_name: str):
-        """
-        Handles the creation of a large entity node when a threshold is exceeded.
-        """
-        print(f"Large number of {rel_type.name} relationships for {source_domain}. Creating a large entity node.")
-        entity_name = f"Large collection of {rel_type.name} for {source_domain}"
-        self.graph.add_node(entity_name, NodeType.LARGE_ENTITY, metadata={"count": len(relationships)})
-        self.graph.add_edge(source_domain, entity_name, rel_type, 0.9, provider_name, {"info": "Aggregated node"})
-
-    def _query_providers_for_ip(self, ip: str) -> Set[str]:
-        """
-        Query all enabled providers for information about an IP address and collect comprehensive metadata.
-        """
-        print(f"Querying {len(self.providers)} providers for IP: {ip}")
-        discovered_hostnames = set()
-
-        if not self.providers or self.stop_event.is_set():
-            return discovered_hostnames
-
-        # Comprehensive metadata collection for this IP
-        ip_metadata = {
-            'dns_records': [],
-            'passive_dns': [],
-            'shodan': {},
-            'virustotal': {},
-            'asn_data': {},
-            'hostnames': [],
-        }
-
-        all_relationships = []  # Store relationships with provider info
-
-        with ThreadPoolExecutor(max_workers=len(self.providers)) as provider_executor:
-            future_to_provider = {
-                provider_executor.submit(self._safe_provider_query_ip, provider, ip): provider
-                for provider in self.providers
-            }
-            for future in as_completed(future_to_provider):
-                if self.stop_event.is_set():
-                    future.cancel()
-                    continue
-                provider = future_to_provider[future]
-                try:
-                    relationships = future.result()
-                    print(f"Provider {provider.get_name()} returned {len(relationships)} relationships for IP {ip}")
-                    
-                    for source, target, rel_type, confidence, raw_data in relationships:
-                        enhanced_rel = (source, target, rel_type, confidence, raw_data, provider.get_name())
-                        all_relationships.append(enhanced_rel)
-                        self._collect_ip_metadata(ip, provider.get_name(), rel_type, target, raw_data, ip_metadata)
-                            
-                except (Exception, CancelledError) as e:
-                    print(f"Provider {provider.get_name()} failed for IP {ip}: {e}")
-
-        # NEW: Group all targets by type and identify large entities
-        discovered_targets_by_type = defaultdict(set)
-        for _, target, _, _, _, _ in all_relationships:
-            if _is_valid_domain(target):
-                discovered_targets_by_type[NodeType.DOMAIN].add(target)
-
-        # THE FIX IS HERE: Initialize the set before using it.
-        targets_to_skip = set()
-        for node_type, targets in discovered_targets_by_type.items():
-            if len(targets) > self.config.large_entity_threshold:
-                print(f"Large number of {node_type.value}s ({len(targets)}) found for IP {ip}. Creating a large entity node.")
-                first_rel = next((r for r in all_relationships if r[1] in targets), None)
-                if first_rel:
-                    self._handle_large_entity(ip, list(targets), first_rel[2], first_rel[5])
-                targets_to_skip.update(targets)
-
-        # Update the IP node with comprehensive metadata
-        self.graph.add_node(ip, NodeType.IP, metadata=ip_metadata)
-
-        # Process relationships with correct provider attribution
-        for source, target, rel_type, confidence, raw_data, provider_name in all_relationships:
-            if target in targets_to_skip:
-                continue
-
-            if _is_valid_domain(target):
-                target_node_type = NodeType.DOMAIN
-                discovered_hostnames.add(target)
-            elif target.startswith('AS'):
-                target_node_type = NodeType.ASN
-            else:
-                target_node_type = NodeType.IP
-            
-            self.graph.add_node(target, target_node_type)
-            
-            if self.graph.add_edge(source, target, rel_type, confidence, provider_name, raw_data):
-                print(f"Added IP relationship: {source} -> {target} ({rel_type.relationship_name}) from {provider_name}")
+                if passive_entry not in metadata.get('passive_dns', []):
+                    metadata.setdefault('passive_dns', []).append(passive_entry)
        
-        return discovered_hostnames
-
-    def _collect_ip_metadata(self, ip: str, provider_name: str, rel_type: RelationshipType,
-                            target: str, raw_data: Dict[str, Any], metadata: Dict[str, Any]) -> None:
-        """
-        Collect and organize metadata for an IP node based on provider responses.
-        """
-        if provider_name == 'dns':
-            if rel_type == RelationshipType.PTR_RECORD:
-                reverse_entry = f"PTR: {target}"
-                if reverse_entry not in metadata['dns_records']:
-                    metadata['dns_records'].append(reverse_entry)
-                if target not in metadata['hostnames']:
-                    metadata['hostnames'].append(target)
-        
-        elif provider_name == 'shodan':
-            # Merge Shodan data
-            for key, value in raw_data.items():
-                if key not in metadata['shodan'] or not metadata['shodan'][key]:
-                    metadata['shodan'][key] = value
-            
-            # Collect hostname information
-            if 'hostname' in raw_data and raw_data['hostname'] not in metadata['hostnames']:
-                metadata['hostnames'].append(raw_data['hostname'])
-            if 'hostnames' in raw_data:
-                for hostname in raw_data['hostnames']:
-                    if hostname not in metadata['hostnames']:
-                        metadata['hostnames'].append(hostname)
-        
-        elif provider_name == 'virustotal':
-            # Merge VirusTotal data
-            for key, value in raw_data.items():
-                if key not in metadata['virustotal'] or not metadata['virustotal'][key]:
-                    metadata['virustotal'][key] = value
-            
-            # Add passive DNS entries
-            if rel_type == RelationshipType.PASSIVE_DNS:
-                passive_entry = f"Passive DNS: {target}"
-                if passive_entry not in metadata['passive_dns']:
-                    metadata['passive_dns'].append(passive_entry)
-        
-        # Handle ASN relationships
        if rel_type == RelationshipType.ASN_MEMBERSHIP:
            metadata['asn_data'] = {
                'asn': target,
@ -723,59 +512,57 @@ class Scanner:
                'country': raw_data.get('country', '')
            }

+    def _handle_large_entity(self, source: str, targets: list, rel_type: RelationshipType, provider_name: str):
+        """
+        Handles the creation of a large entity node when a threshold is exceeded.
+        """
+        print(f"Large number of {rel_type.name} relationships for {source}. Creating a large entity node.")
+        entity_name = f"Large collection of {rel_type.name} for {source}"
+        node_type = 'unknown'
+        if targets:
+            if _is_valid_domain(targets[0]):
+                node_type = 'domain'
+            elif _is_valid_ip(targets[0]):
+                node_type = 'ip'
+        self.graph.add_node(entity_name, NodeType.LARGE_ENTITY, metadata={"count": len(targets), "nodes": targets, "node_type": node_type})
+        self.graph.add_edge(source, entity_name, rel_type, 0.9, provider_name, {"info": "Aggregated node"})

-    def _safe_provider_query_domain(self, provider, domain: str):
-        """Safely query provider for domain with error handling."""
+    def _safe_provider_query(self, provider, target: str, is_ip: bool) -> List[Tuple[str, str, RelationshipType, float, Dict[str, Any]]]:
+        """Safely query a provider for a target with error handling."""
        if self.stop_event.is_set():
            return []
        try:
-            return provider.query_domain(domain)
+            if is_ip:
+                return provider.query_ip(target)
+            else:
+                return provider.query_domain(target)
        except Exception as e:
-            print(f"Provider {provider.get_name()} query_domain failed: {e}")
-            return []
-
-    def _safe_provider_query_ip(self, provider, ip: str):
-        """Safely query provider for IP with error handling."""
-        if self.stop_event.is_set():
-            return []
-        try:
-            return provider.query_ip(ip)
-        except Exception as e:
-            print(f"Provider {provider.get_name()} query_ip failed: {e}")
+            print(f"Provider {provider.get_name()} query failed for {target}: {e}")
            return []

    def stop_scan(self) -> bool:
        """
-        Request immediate scan termination with aggressive cancellation.
+        Request immediate scan termination.
+        Acts on the thread's liveness, not just the 'RUNNING' status.
        """
        try:
-            if self.status == ScanStatus.RUNNING:
-                print("=== INITIATING IMMEDIATE SCAN TERMINATION ===")
-                
-                # Signal all threads to stop
-                self.stop_event.set()
-                
-                # Close HTTP sessions in all providers to terminate ongoing requests
-                for provider in self.providers:
-                    try:
-                        if hasattr(provider, 'session'):
-                            provider.session.close()
-                            print(f"Closed HTTP session for provider: {provider.get_name()}")
-                    except Exception as e:
-                        print(f"Error closing session for {provider.get_name()}: {e}")
-                
-                # Shutdown executor immediately with cancel_futures=True
-                if self.executor:
-                    print("Shutting down executor with immediate cancellation...")
-                    self.executor.shutdown(wait=False, cancel_futures=True)
-                
-                # Give threads a moment to respond to cancellation, then force status change
-                threading.Timer(2.0, self._force_stop_completion).start()
-                
-                print("Immediate termination requested - ongoing requests will be cancelled")
-                return True
-            print("No active scan to stop")
-            return False
+            if not self.scan_thread or not self.scan_thread.is_alive():
+                print("No active scan thread to stop.")
+                # Cleanup state if inconsistent
+                if self.status == ScanStatus.RUNNING:
+                    self.status = ScanStatus.STOPPED
+                return False
+
+            print("=== INITIATING IMMEDIATE SCAN TERMINATION ===")
+            self.status = ScanStatus.STOPPED
+            self.stop_event.set()
+
+            if self.executor:
+                print("Shutting down executor with immediate cancellation...")
+                self.executor.shutdown(wait=False, cancel_futures=True)
+
+            print("Termination signal sent. The scan thread will stop shortly.")
+            return True
        except Exception as e:
            print(f"ERROR: Exception in stop_scan: {e}")
            traceback.print_exc()
--- a/static/css/main.css
+++ b/static/css/main.css
@ -314,9 +314,39 @@ input[type="text"]:focus, select:focus {

 .view-controls {
    display: flex;
+    gap: 1.5rem;
+    align-items: center;
+}
+
+.filter-group {
+    display: flex;
+    align-items: center;
    gap: 0.5rem;
 }

+.filter-group label {
+    font-size: 0.9rem;
+    color: #999;
+}
+
+.filter-group select,
+.filter-group input[type="range"] {
+    background-color: #1a1a1a;
+    border: 1px solid #555;
+    color: #c7c7c7;
+    padding: 0.25rem 0.5rem;
+}
+
+.filter-group select {
+    max-width: 150px;
+}
+
+#confidence-value {
+    min-width: 30px;
+    text-align: center;
+    color: #00ff41;
+}
+
 .graph-container {
    height: 800px;
    position: relative;
@ -905,4 +935,39 @@ input[type="text"]:focus, select:focus {
        transform: translateX(100%);
        opacity: 0;
    }
+}
+
+/* dnsrecon/static/css/main.css */
+
+/* ... (at the end of the file) */
+
+.large-entity-nodes-list {
+    margin-top: 1rem;
+}
+
+.large-entity-node-details {
+    margin-bottom: 0.5rem;
+    border: 1px solid #333;
+    border-radius: 3px;
+}
+
+.large-entity-node-details summary {
+    padding: 0.5rem;
+    background-color: #3a3a3a;
+    cursor: pointer;
+    outline: none;
+}
+
+.large-entity-node-details summary:hover {
+    background-color: #4a4a4a;
+}
+
+.large-entity-node-details .detail-row {
+    margin-left: 1rem;
+    margin-right: 1rem;
+}
+
+.large-entity-node-details .detail-section-header {
+    margin-left: 1rem;
+    margin-right: 1rem;
 }
--- a/static/js/graph.js
+++ b/static/js/graph.js
@ -447,7 +447,7 @@ class GraphManager {
            'ip': '#ff9900',         // Amber
            'asn': '#00aaff',         // Blue
            'large_entity': '#ff6b6b', // Red for large entities
-            'dns_record': '#999999'
+            'dns_record': '#9620c0ff'
        };
        return colors[nodeType] || '#ffffff';
    }
@ -463,7 +463,7 @@ class GraphManager {
            'domain': '#00aa2e',
            'ip': '#cc7700',
            'asn': '#0088cc',
-            'dns_record': '#999999'
+            'dns_record': '#c235c9ff'
        };
        return borderColors[nodeType] || '#666666';
    }
--- a/static/js/main.js
+++ b/static/js/main.js
@ -54,6 +54,7 @@ class DNSReconApp {
            targetDomain: document.getElementById('target-domain'),
            maxDepth: document.getElementById('max-depth'),
            startScan: document.getElementById('start-scan'),
+            addToGraph: document.getElementById('add-to-graph'),
            stopScan: document.getElementById('stop-scan'),
            exportResults: document.getElementById('export-results'),
            configureApiKeys: document.getElementById('configure-api-keys'),
@ -136,6 +137,11 @@ class DNSReconApp {
                e.preventDefault();
                this.startScan();
            });
+
+            this.elements.addToGraph.addEventListener('click', (e) => {
+                e.preventDefault();
+                this.startScan(false);
+            });
            
            this.elements.stopScan.addEventListener('click', (e) => {
                console.log('Stop scan button clicked');
@ -230,7 +236,7 @@ class DNSReconApp {
    /**
     * Start a reconnaissance scan
     */
-    async startScan() {
+    async startScan(clearGraph = true) {
        console.log('=== STARTING SCAN ===');
        
        try {
@ -262,7 +268,8 @@ class DNSReconApp {
            
            const requestData = {
                target_domain: targetDomain,
-                max_depth: maxDepth
+                max_depth: maxDepth,
+                clear_graph: clearGraph
            };
            
            console.log('Request data:', requestData);
@ -273,12 +280,12 @@ class DNSReconApp {
            
            if (response.success) {
                this.currentSessionId = response.scan_id;
-                console.log('Starting polling with session ID:', this.currentSessionId);
                this.startPolling();
                this.showSuccess('Reconnaissance scan started successfully');
-                
-                // Clear previous graph
-                this.graphManager.clear();
+
+                if (clearGraph) {
+                    this.graphManager.clear();
+                }
                
                console.log(`Scan started for ${targetDomain} with depth ${maxDepth}`);
                
@ -627,6 +634,10 @@ class DNSReconApp {
                    this.elements.startScan.disabled = true;
                    this.elements.startScan.classList.add('loading');
                }
+                if (this.elements.addToGraph) {
+                    this.elements.addToGraph.disabled = true;
+                    this.elements.addToGraph.classList.add('loading');
+                }
                if (this.elements.stopScan) {
                    this.elements.stopScan.disabled = false;
                    this.elements.stopScan.classList.remove('loading');
@ -645,6 +656,10 @@ class DNSReconApp {
                    this.elements.startScan.disabled = false;
                    this.elements.startScan.classList.remove('loading');
                }
+                if (this.elements.addToGraph) {
+                    this.elements.addToGraph.disabled = false;
+                    this.elements.addToGraph.classList.remove('loading');
+                }
                if (this.elements.stopScan) {
                    this.elements.stopScan.disabled = true;
                }
@ -725,26 +740,21 @@ class DNSReconApp {
            this.elements.providerList.appendChild(providerItem);
        }
    }
-    
+
    /**
-     * Show node details modal
-     * @param {string} nodeId - Node identifier
-     * @param {Object} node - Node data
+     * Generates the HTML for the node details view.
+     * @param {Object} node - The node object.
+     * @returns {string} The HTML string for the node details.
     */
-    showNodeModal(nodeId, node) {
-        if (!this.elements.nodeModal) return;
-
-        if (this.elements.modalTitle) {
-            this.elements.modalTitle.textContent = `Node Details: ${nodeId}`;
-        }
-
+    generateNodeDetailsHtml(node) {
+        if(!node) return '<div class="detail-row"><span class="detail-value">Details not available.</span></div>';
        let detailsHtml = '';
        const createDetailRow = (label, value, statusIcon = '') => {
-            const baseId = `detail-${label.replace(/[^a-zA-Z0-9]/g, '-')}`;
+            const baseId = `detail-${node.id.replace(/[^a-zA-Z0-9]/g, '-')}-${label.replace(/[^a-zA-Z0-9]/g, '-')}`;

            if (value === null || value === undefined ||
                (Array.isArray(value) && value.length === 0) ||
-                (typeof value === 'object' && Object.keys(value).length === 0)) {
+                (typeof value === 'object' && !Array.isArray(value) && Object.keys(value).length === 0)) {
                return `
                    <div class="detail-row">
                        <span class="detail-label">${label} <span class="status-icon text-warning">✗</span></span>
@ -779,11 +789,9 @@ class DNSReconApp {
        };

        const metadata = node.metadata || {};
-        
-        // General Node Info
-        detailsHtml += createDetailRow('Node Type', node.type);

-        // Display data based on node type
+        detailsHtml += createDetailRow('Node Descriptor', node.id);
+
        switch (node.type) {
            case 'domain':
                detailsHtml += createDetailRow('DNS Records', metadata.dns_records);
@ -799,8 +807,7 @@ class DNSReconApp {
                detailsHtml += createDetailRow('VirusTotal Data', metadata.virustotal);
                break;
        }
-        
-        // Special handling for certificate data
+
        if (metadata.certificate_data && Object.keys(metadata.certificate_data).length > 0) {
            const cert = metadata.certificate_data;
            detailsHtml += `<div class="detail-section-header">Certificate Summary</div>`;
@ -817,8 +824,7 @@ class DNSReconApp {
                detailsHtml += createDetailRow('Valid Until', new Date(cert.latest_certificate.not_after).toLocaleString());
            }
        }
-        
-        // Special handling for ASN data
+
        if (metadata.asn_data && Object.keys(metadata.asn_data).length > 0) {
            detailsHtml += `<div class="detail-section-header">ASN Information</div>`;
            detailsHtml += createDetailRow('ASN', metadata.asn_data.asn);
@ -827,6 +833,44 @@ class DNSReconApp {
            detailsHtml += createDetailRow('Country', metadata.asn_data.country);
        }

+        return detailsHtml;
+    }
+
+    
+    /**
+     * Show node details modal
+     * @param {string} nodeId - Node identifier
+     * @param {Object} node - Node data
+     */
+    showNodeModal(nodeId, node) {
+        if (!this.elements.nodeModal) return;
+
+        if (this.elements.modalTitle) {
+            this.elements.modalTitle.textContent = `Node Details`;
+        }
+        let detailsHtml = '';
+
+        if (node.type === 'large_entity') {
+            const metadata = node.metadata || {};
+            const nodes = metadata.nodes || [];
+            const node_type = metadata.node_type || 'nodes';
+            detailsHtml += `<div class="detail-section-header">Contains ${metadata.count} ${node_type}s</div>`;
+            detailsHtml += '<div class="large-entity-nodes-list">';
+
+            for(const innerNodeId of nodes) {
+                const innerNode = this.graphManager.nodes.get(innerNodeId);
+                detailsHtml += `<details class="large-entity-node-details">`;
+                detailsHtml += `<summary>${innerNodeId}</summary>`;
+                detailsHtml += this.generateNodeDetailsHtml(innerNode);
+                detailsHtml += `</details>`;
+            }
+            detailsHtml += '</div>';
+
+        } else {
+             detailsHtml = this.generateNodeDetailsHtml(node);
+        }
+
+
        if (this.elements.modalDetails) {
            this.elements.modalDetails.innerHTML = detailsHtml;
        }
--- a/templates/index.html
+++ b/templates/index.html
@ -52,6 +52,10 @@
                            <span class="btn-icon">[RUN]</span>
                            <span>Start Reconnaissance</span>
                        </button>
+                        <button id="add-to-graph" class="btn btn-primary">
+                            <span class="btn-icon">[ADD]</span>
+                            <span>Add to Graph</span>
+                        </button>
                        <button id="stop-scan" class="btn btn-secondary" disabled>
                            <span class="btn-icon">[STOP]</span>
                            <span>Terminate Scan</span>
@ -135,7 +139,7 @@
                    </div>
                    <div class="legend-item">
                        <div class="legend-color" style="background-color: #c7c7c7;"></div>
-                        <span>Certificates</span>
+                        <span>Domain (invalid cert)</span>
                    </div>
                    <div class="legend-item">
                        <div class="legend-color" style="background-color: #9d4edd;"></div>
Author	SHA1	Message	Date
overcuriousity	646b569ced	it	2025-09-11 21:38:04 +02:00
overcuriousity	b47e679992	it	2025-09-11 20:37:43 +02:00