diff --git a/.env.example b/.env.example index 6c05aa7..47ee018 100644 --- a/.env.example +++ b/.env.example @@ -25,10 +25,10 @@ DEFAULT_RECURSION_DEPTH=2 # Default timeout for provider API requests in seconds. DEFAULT_TIMEOUT=30 # The number of concurrent provider requests to make. -MAX_CONCURRENT_REQUESTS=5 +MAX_CONCURRENT_REQUESTS=1 # The number of results from a provider that triggers the "large entity" grouping. LARGE_ENTITY_THRESHOLD=100 # The number of times to retry a target if a provider fails. MAX_RETRIES_PER_TARGET=8 # How long cached provider responses are stored (in hours). -CACHE_EXPIRY_HOURS=12 +CACHE_TIMEOUT_HOURS=12 diff --git a/app.py b/app.py index b2233ff..ae2206b 100644 --- a/app.py +++ b/app.py @@ -10,46 +10,63 @@ import traceback from flask import Flask, render_template, request, jsonify, send_file, session from datetime import datetime, timezone, timedelta import io +import os from core.session_manager import session_manager from config import config from core.graph_manager import NodeType from utils.helpers import is_valid_target +from decimal import Decimal app = Flask(__name__) -# Use centralized configuration for Flask settings app.config['SECRET_KEY'] = config.flask_secret_key app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=config.flask_permanent_session_lifetime_hours) def get_user_scanner(): """ - Retrieves the scanner for the current session, or creates a new - session and scanner if one doesn't exist. + Retrieves the scanner for the current session, or creates a new one if none exists. """ - # Get current Flask session info for debugging current_flask_session_id = session.get('dnsrecon_session_id') - # Try to get existing session if current_flask_session_id: existing_scanner = session_manager.get_session(current_flask_session_id) if existing_scanner: return current_flask_session_id, existing_scanner - # Create new session if none exists - print("Creating new session as none was found...") new_session_id = session_manager.create_session() new_scanner = session_manager.get_session(new_session_id) if not new_scanner: raise Exception("Failed to create new scanner session") - # Store in Flask session session['dnsrecon_session_id'] = new_session_id session.permanent = True return new_session_id, new_scanner +class CustomJSONEncoder(json.JSONEncoder): + """Custom JSON encoder to handle non-serializable objects.""" + + def default(self, obj): + if isinstance(obj, datetime): + return obj.isoformat() + elif isinstance(obj, set): + return list(obj) + elif isinstance(obj, Decimal): + return float(obj) + elif hasattr(obj, '__dict__'): + # For custom objects, try to serialize their dict representation + try: + return obj.__dict__ + except: + return str(obj) + elif hasattr(obj, 'value') and hasattr(obj, 'name'): + # For enum objects + return obj.value + else: + # For any other non-serializable object, convert to string + return str(obj) @app.route('/') def index(): """Serve the main web interface.""" @@ -59,11 +76,8 @@ def index(): @app.route('/api/scan/start', methods=['POST']) def start_scan(): """ - Start a new reconnaissance scan. Creates a new isolated scanner if - clear_graph is true, otherwise adds to the existing one. + Starts a new reconnaissance scan. """ - print("=== API: /api/scan/start called ===") - try: data = request.get_json() if not data or 'target' not in data: @@ -72,47 +86,28 @@ def start_scan(): target = data['target'].strip() max_depth = data.get('max_depth', config.default_recursion_depth) clear_graph = data.get('clear_graph', True) - force_rescan_target = data.get('force_rescan_target', None) # **FIX**: Get the new parameter + force_rescan_target = data.get('force_rescan_target', None) - print(f"Parsed - target: '{target}', max_depth: {max_depth}, clear_graph: {clear_graph}, force_rescan: {force_rescan_target}") - - # Validation if not target: return jsonify({'success': False, 'error': 'Target cannot be empty'}), 400 if not is_valid_target(target): - return jsonify({'success': False, 'error': 'Invalid target format. Please enter a valid domain or IP address.'}), 400 + return jsonify({'success': False, 'error': 'Invalid target format.'}), 400 if not isinstance(max_depth, int) or not 1 <= max_depth <= 5: return jsonify({'success': False, 'error': 'Max depth must be an integer between 1 and 5'}), 400 - user_session_id, scanner = None, None - - if clear_graph: - print("Clear graph requested: Creating a new, isolated scanner session.") - old_session_id = session.get('dnsrecon_session_id') - if old_session_id: - session_manager.terminate_session(old_session_id) - - user_session_id = session_manager.create_session() - session['dnsrecon_session_id'] = user_session_id - session.permanent = True - scanner = session_manager.get_session(user_session_id) - else: - print("Adding to existing graph: Reusing the current scanner session.") - user_session_id, scanner = get_user_scanner() - + user_session_id, scanner = get_user_scanner() + if not scanner: - return jsonify({'success': False, 'error': 'Failed to get or create a scanner instance.'}), 500 + return jsonify({'success': False, 'error': 'Failed to get scanner instance.'}), 500 - print(f"Using scanner {id(scanner)} in session {user_session_id}") - - success = scanner.start_scan(target, max_depth, clear_graph=clear_graph, force_rescan_target=force_rescan_target) # **FIX**: Pass the new parameter + success = scanner.start_scan(target, max_depth, clear_graph=clear_graph, force_rescan_target=force_rescan_target) if success: return jsonify({ 'success': True, 'message': 'Scan started successfully', 'scan_id': scanner.logger.session_id, - 'user_session_id': user_session_id, + 'user_session_id': user_session_id }) else: return jsonify({ @@ -121,170 +116,98 @@ def start_scan(): }), 409 except Exception as e: - print(f"ERROR: Exception in start_scan endpoint: {e}") traceback.print_exc() return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500 - + @app.route('/api/scan/stop', methods=['POST']) def stop_scan(): - """Stop the current scan with immediate GUI feedback.""" - print("=== API: /api/scan/stop called ===") - + """Stop the current scan.""" try: - # Get user-specific scanner user_session_id, scanner = get_user_scanner() - print(f"Stopping scan for session: {user_session_id}") if not scanner: - return jsonify({ - 'success': False, - 'error': 'No scanner found for session' - }), 404 + return jsonify({'success': False, 'error': 'No scanner found for session'}), 404 - # Ensure session ID is set if not scanner.session_id: scanner.session_id = user_session_id - # Use the stop mechanism - success = scanner.stop_scan() - - # Also set the Redis stop signal directly for extra reliability + scanner.stop_scan() session_manager.set_stop_signal(user_session_id) - - # Force immediate status update session_manager.update_scanner_status(user_session_id, 'stopped') - - # Update the full scanner state session_manager.update_session_scanner(user_session_id, scanner) - print(f"Stop scan completed. Success: {success}, Scanner status: {scanner.status}") - return jsonify({ 'success': True, - 'message': 'Scan stop requested - termination initiated', - 'user_session_id': user_session_id, - 'scanner_status': scanner.status, - 'stop_method': 'cross_process' + 'message': 'Scan stop requested', + 'user_session_id': user_session_id }) except Exception as e: - print(f"ERROR: Exception in stop_scan endpoint: {e}") traceback.print_exc() - return jsonify({ - 'success': False, - 'error': f'Internal server error: {str(e)}' - }), 500 + return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500 @app.route('/api/scan/status', methods=['GET']) def get_scan_status(): - """Get current scan status with error handling.""" + """Get current scan status.""" try: - # Get user-specific scanner user_session_id, scanner = get_user_scanner() if not scanner: - # Return default idle status if no scanner return jsonify({ 'success': True, 'status': { - 'status': 'idle', - 'target_domain': None, - 'current_depth': 0, - 'max_depth': 0, - 'current_indicator': '', - 'total_indicators_found': 0, - 'indicators_processed': 0, - 'progress_percentage': 0.0, - 'enabled_providers': [], - 'graph_statistics': {}, + 'status': 'idle', 'target_domain': None, 'current_depth': 0, + 'max_depth': 0, 'progress_percentage': 0.0, 'user_session_id': user_session_id } }) - # Ensure session ID is set if not scanner.session_id: scanner.session_id = user_session_id status = scanner.get_scan_status() status['user_session_id'] = user_session_id - # Additional debug info - status['debug_info'] = { - 'scanner_object_id': id(scanner), - 'session_id_set': bool(scanner.session_id), - 'has_scan_thread': bool(scanner.scan_thread and scanner.scan_thread.is_alive()) - } - - return jsonify({ - 'success': True, - 'status': status - }) + return jsonify({'success': True, 'status': status}) except Exception as e: - print(f"ERROR: Exception in get_scan_status endpoint: {e}") traceback.print_exc() return jsonify({ - 'success': False, - 'error': f'Internal server error: {str(e)}', - 'fallback_status': { - 'status': 'error', - 'target_domain': None, - 'current_depth': 0, - 'max_depth': 0, - 'progress_percentage': 0.0 - } + 'success': False, 'error': f'Internal server error: {str(e)}', + 'fallback_status': {'status': 'error', 'progress_percentage': 0.0} }), 500 @app.route('/api/graph', methods=['GET']) def get_graph_data(): - """Get current graph data with error handling.""" + """Get current graph data.""" try: - # Get user-specific scanner user_session_id, scanner = get_user_scanner() - if not scanner: - # Return empty graph if no scanner - return jsonify({ - 'success': True, - 'graph': { - 'nodes': [], - 'edges': [], - 'statistics': { - 'node_count': 0, - 'edge_count': 0, - 'creation_time': datetime.now(timezone.utc).isoformat(), - 'last_modified': datetime.now(timezone.utc).isoformat() - } - }, - 'user_session_id': user_session_id - }) + empty_graph = { + 'nodes': [], 'edges': [], + 'statistics': {'node_count': 0, 'edge_count': 0} + } - graph_data = scanner.get_graph_data() - return jsonify({ - 'success': True, - 'graph': graph_data, - 'user_session_id': user_session_id - }) + if not scanner: + return jsonify({'success': True, 'graph': empty_graph, 'user_session_id': user_session_id}) + + graph_data = scanner.get_graph_data() or empty_graph + + return jsonify({'success': True, 'graph': graph_data, 'user_session_id': user_session_id}) except Exception as e: - print(f"ERROR: Exception in get_graph_data endpoint: {e}") traceback.print_exc() return jsonify({ - 'success': False, - 'error': f'Internal server error: {str(e)}', - 'fallback_graph': { - 'nodes': [], - 'edges': [], - 'statistics': {'node_count': 0, 'edge_count': 0} - } + 'success': False, 'error': f'Internal server error: {str(e)}', + 'fallback_graph': {'nodes': [], 'edges': [], 'statistics': {}} }), 500 @app.route('/api/graph/large-entity/extract', methods=['POST']) def extract_from_large_entity(): - """Extract a node from a large entity, making it a standalone node.""" + """Extract a node from a large entity.""" try: data = request.get_json() large_entity_id = data.get('large_entity_id') @@ -306,13 +229,12 @@ def extract_from_large_entity(): return jsonify({'success': False, 'error': f'Failed to extract node {node_id}.'}), 500 except Exception as e: - print(f"ERROR: Exception in extract_from_large_entity endpoint: {e}") traceback.print_exc() return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500 @app.route('/api/graph/node/', methods=['DELETE']) def delete_graph_node(node_id): - """Delete a node from the graph for the current user session.""" + """Delete a node from the graph.""" try: user_session_id, scanner = get_user_scanner() if not scanner: @@ -321,14 +243,12 @@ def delete_graph_node(node_id): success = scanner.graph.remove_node(node_id) if success: - # Persist the change session_manager.update_session_scanner(user_session_id, scanner) return jsonify({'success': True, 'message': f'Node {node_id} deleted successfully.'}) else: - return jsonify({'success': False, 'error': f'Node {node_id} not found in graph.'}), 404 + return jsonify({'success': False, 'error': f'Node {node_id} not found.'}), 404 except Exception as e: - print(f"ERROR: Exception in delete_graph_node endpoint: {e}") traceback.print_exc() return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500 @@ -349,7 +269,6 @@ def revert_graph_action(): action_data = data['data'] if action_type == 'delete': - # Re-add the node node_to_add = action_data.get('node') if node_to_add: scanner.graph.add_node( @@ -360,131 +279,147 @@ def revert_graph_action(): metadata=node_to_add.get('metadata') ) - # Re-add the edges edges_to_add = action_data.get('edges', []) for edge in edges_to_add: - # Add edge only if both nodes exist to prevent errors if scanner.graph.graph.has_node(edge['from']) and scanner.graph.graph.has_node(edge['to']): scanner.graph.add_edge( - source_id=edge['from'], - target_id=edge['to'], + source_id=edge['from'], target_id=edge['to'], relationship_type=edge['metadata']['relationship_type'], confidence_score=edge['metadata']['confidence_score'], source_provider=edge['metadata']['source_provider'], raw_data=edge.get('raw_data', {}) ) - # Persist the change session_manager.update_session_scanner(user_session_id, scanner) return jsonify({'success': True, 'message': 'Delete action reverted successfully.'}) return jsonify({'success': False, 'error': f'Unknown revert action type: {action_type}'}), 400 except Exception as e: - print(f"ERROR: Exception in revert_graph_action endpoint: {e}") traceback.print_exc() return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500 @app.route('/api/export', methods=['GET']) def export_results(): - """Export complete scan results as downloadable JSON for the user session.""" + """Export scan results as a JSON file with improved error handling.""" try: - # Get user-specific scanner user_session_id, scanner = get_user_scanner() - # Get complete results - results = scanner.export_results() + if not scanner: + return jsonify({'success': False, 'error': 'No active scanner session found'}), 404 - # Add session information to export + # Get export data with error handling + try: + results = scanner.export_results() + except Exception as e: + return jsonify({'success': False, 'error': f'Failed to gather export data: {str(e)}'}), 500 + + # Add export metadata results['export_metadata'] = { 'user_session_id': user_session_id, 'export_timestamp': datetime.now(timezone.utc).isoformat(), - 'export_type': 'user_session_results' + 'export_version': '1.0.0', + 'forensic_integrity': 'maintained' } - # Create filename with timestamp + # Generate filename with forensic naming convention timestamp = datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S') target = scanner.current_target or 'unknown' - filename = f"dnsrecon_{target}_{timestamp}_{user_session_id[:8]}.json" + # Sanitize target for filename + safe_target = "".join(c for c in target if c.isalnum() or c in ('-', '_', '.')).rstrip() + filename = f"dnsrecon_{safe_target}_{timestamp}.json" - # Create in-memory file - json_data = json.dumps(results, indent=2, ensure_ascii=False) + # Serialize with custom encoder and error handling + try: + json_data = json.dumps(results, indent=2, cls=CustomJSONEncoder, ensure_ascii=False) + except Exception as e: + # If custom encoder fails, try a more aggressive approach + try: + # Convert problematic objects to strings recursively + cleaned_results = _clean_for_json(results) + json_data = json.dumps(cleaned_results, indent=2, ensure_ascii=False) + except Exception as e2: + return jsonify({ + 'success': False, + 'error': f'JSON serialization failed: {str(e2)}' + }), 500 + + # Create file object file_obj = io.BytesIO(json_data.encode('utf-8')) return send_file( - file_obj, + file_obj, as_attachment=True, - download_name=filename, + download_name=filename, mimetype='application/json' ) except Exception as e: - print(f"ERROR: Exception in export_results endpoint: {e}") traceback.print_exc() return jsonify({ - 'success': False, - 'error': f'Export failed: {str(e)}' + 'success': False, + 'error': f'Export failed: {str(e)}', + 'error_type': type(e).__name__ }), 500 - -@app.route('/api/providers', methods=['GET']) -def get_providers(): - """Get information about available providers for the user session.""" +def _clean_for_json(obj, max_depth=10, current_depth=0): + """ + Recursively clean an object to make it JSON serializable. + Handles circular references and problematic object types. + """ + if current_depth > max_depth: + return f"" - try: - # Get user-specific scanner - user_session_id, scanner = get_user_scanner() - - if scanner: - # Updated debug print to be consistent with the new progress bar logic - completed_tasks = scanner.indicators_completed - total_tasks = scanner.total_tasks_ever_enqueued - print(f"DEBUG: Task Progress - Completed: {completed_tasks}, Total Enqueued: {total_tasks}") - else: - print("DEBUG: No active scanner session found.") - - provider_info = scanner.get_provider_info() - - return jsonify({ - 'success': True, - 'providers': provider_info, - 'user_session_id': user_session_id - }) - - except Exception as e: - print(f"ERROR: Exception in get_providers endpoint: {e}") - traceback.print_exc() - return jsonify({ - 'success': False, - 'error': f'Internal server error: {str(e)}' - }), 500 - + if obj is None or isinstance(obj, (bool, int, float, str)): + return obj + elif isinstance(obj, datetime): + return obj.isoformat() + elif isinstance(obj, (set, frozenset)): + return list(obj) + elif isinstance(obj, dict): + cleaned = {} + for key, value in obj.items(): + try: + # Ensure key is string + clean_key = str(key) if not isinstance(key, str) else key + cleaned[clean_key] = _clean_for_json(value, max_depth, current_depth + 1) + except Exception: + cleaned[str(key)] = f"" + return cleaned + elif isinstance(obj, (list, tuple)): + cleaned = [] + for item in obj: + try: + cleaned.append(_clean_for_json(item, max_depth, current_depth + 1)) + except Exception: + cleaned.append(f"") + return cleaned + elif hasattr(obj, '__dict__'): + try: + return _clean_for_json(obj.__dict__, max_depth, current_depth + 1) + except Exception: + return str(obj) + elif hasattr(obj, 'value'): + # For enum-like objects + return obj.value + else: + return str(obj) @app.route('/api/config/api-keys', methods=['POST']) def set_api_keys(): - """ - Set API keys for providers for the user session only. - """ + """Set API keys for the current session.""" try: data = request.get_json() - if data is None: - return jsonify({ - 'success': False, - 'error': 'No API keys provided' - }), 400 + return jsonify({'success': False, 'error': 'No API keys provided'}), 400 - # Get user-specific scanner and config user_session_id, scanner = get_user_scanner() session_config = scanner.config updated_providers = [] - # Iterate over the API keys provided in the request data for provider_name, api_key in data.items(): - # This allows us to both set and clear keys. The config - # handles enabling/disabling based on if the key is empty. api_key_value = str(api_key or '').strip() success = session_config.set_api_key(provider_name.lower(), api_key_value) @@ -492,60 +427,136 @@ def set_api_keys(): updated_providers.append(provider_name) if updated_providers: - # Reinitialize scanner providers to apply the new keys scanner._initialize_providers() - - # Persist the updated scanner object back to the user's session session_manager.update_session_scanner(user_session_id, scanner) return jsonify({ 'success': True, - 'message': f'API keys updated for session {user_session_id}: {", ".join(updated_providers)}', - 'updated_providers': updated_providers, + 'message': f'API keys updated for: {", ".join(updated_providers)}', 'user_session_id': user_session_id }) else: - return jsonify({ - 'success': False, - 'error': 'No valid API keys were provided or provider names were incorrect.' - }), 400 + return jsonify({'success': False, 'error': 'No valid API keys were provided.'}), 400 except Exception as e: - print(f"ERROR: Exception in set_api_keys endpoint: {e}") traceback.print_exc() + return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500 + +@app.route('/api/providers', methods=['GET']) +def get_providers(): + """Get enhanced information about available providers including API key sources.""" + try: + user_session_id, scanner = get_user_scanner() + base_provider_info = scanner.get_provider_info() + + # Enhance provider info with API key source information + enhanced_provider_info = {} + + for provider_name, info in base_provider_info.items(): + enhanced_info = dict(info) # Copy base info + + if info['requires_api_key']: + # Determine API key source and configuration status + api_key = scanner.config.get_api_key(provider_name) + backend_api_key = os.getenv(f'{provider_name.upper()}_API_KEY') + + if backend_api_key: + # API key configured via backend/environment + enhanced_info.update({ + 'api_key_configured': True, + 'api_key_source': 'backend', + 'api_key_help': f'API key configured via environment variable {provider_name.upper()}_API_KEY' + }) + elif api_key: + # API key configured via web interface + enhanced_info.update({ + 'api_key_configured': True, + 'api_key_source': 'frontend', + 'api_key_help': f'API key set via web interface (session-only)' + }) + else: + # No API key configured + enhanced_info.update({ + 'api_key_configured': False, + 'api_key_source': None, + 'api_key_help': f'Requires API key to enable {info["display_name"]} integration' + }) + else: + # Provider doesn't require API key + enhanced_info.update({ + 'api_key_configured': True, # Always "configured" for non-API providers + 'api_key_source': None, + 'api_key_help': None + }) + + enhanced_provider_info[provider_name] = enhanced_info + return jsonify({ - 'success': False, - 'error': f'Internal server error: {str(e)}' - }), 500 + 'success': True, + 'providers': enhanced_provider_info, + 'user_session_id': user_session_id + }) + + except Exception as e: + traceback.print_exc() + return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500 + + +@app.route('/api/config/providers', methods=['POST']) +def configure_providers(): + """Configure provider settings (enable/disable).""" + try: + data = request.get_json() + if data is None: + return jsonify({'success': False, 'error': 'No provider settings provided'}), 400 + + user_session_id, scanner = get_user_scanner() + session_config = scanner.config + + updated_providers = [] + + for provider_name, settings in data.items(): + provider_name_clean = provider_name.lower().strip() + + if 'enabled' in settings: + # Update the enabled state in session config + session_config.enabled_providers[provider_name_clean] = settings['enabled'] + updated_providers.append(provider_name_clean) + + if updated_providers: + # Reinitialize providers with new settings + scanner._initialize_providers() + session_manager.update_session_scanner(user_session_id, scanner) + + return jsonify({ + 'success': True, + 'message': f'Provider settings updated for: {", ".join(updated_providers)}', + 'user_session_id': user_session_id + }) + else: + return jsonify({'success': False, 'error': 'No valid provider settings were provided.'}), 400 + + except Exception as e: + traceback.print_exc() + return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500 + + @app.errorhandler(404) def not_found(error): """Handle 404 errors.""" - return jsonify({ - 'success': False, - 'error': 'Endpoint not found' - }), 404 + return jsonify({'success': False, 'error': 'Endpoint not found'}), 404 @app.errorhandler(500) def internal_error(error): """Handle 500 errors.""" - print(f"ERROR: 500 Internal Server Error: {error}") traceback.print_exc() - return jsonify({ - 'success': False, - 'error': 'Internal server error' - }), 500 + return jsonify({'success': False, 'error': 'Internal server error'}), 500 if __name__ == '__main__': - print("Starting DNSRecon Flask application with user session support...") - - # Load configuration from environment config.load_from_env() - - # Start Flask application - print(f"Starting server on {config.flask_host}:{config.flask_port}") app.run( host=config.flask_host, port=config.flask_port, diff --git a/config.py b/config.py index a01eb24..3333846 100644 --- a/config.py +++ b/config.py @@ -21,11 +21,10 @@ class Config: # --- General Settings --- self.default_recursion_depth = 2 - self.default_timeout = 30 - self.max_concurrent_requests = 5 + self.default_timeout = 60 + self.max_concurrent_requests = 1 self.large_entity_threshold = 100 self.max_retries_per_target = 8 - self.cache_expiry_hours = 12 # --- Provider Caching Settings --- self.cache_timeout_hours = 6 # Provider-specific cache timeout @@ -69,7 +68,6 @@ class Config: self.max_concurrent_requests = int(os.getenv('MAX_CONCURRENT_REQUESTS', self.max_concurrent_requests)) self.large_entity_threshold = int(os.getenv('LARGE_ENTITY_THRESHOLD', self.large_entity_threshold)) self.max_retries_per_target = int(os.getenv('MAX_RETRIES_PER_TARGET', self.max_retries_per_target)) - self.cache_expiry_hours = int(os.getenv('CACHE_EXPIRY_HOURS', self.cache_expiry_hours)) self.cache_timeout_hours = int(os.getenv('CACHE_TIMEOUT_HOURS', self.cache_timeout_hours)) # Override Flask and session settings @@ -87,6 +85,60 @@ class Config: self.enabled_providers[provider] = True return True + def set_provider_enabled(self, provider: str, enabled: bool) -> bool: + """ + Set provider enabled status for the session. + + Args: + provider: Provider name + enabled: Whether the provider should be enabled + + Returns: + True if the setting was applied successfully + """ + provider_key = provider.lower() + self.enabled_providers[provider_key] = enabled + return True + + def get_provider_enabled(self, provider: str) -> bool: + """ + Get provider enabled status. + + Args: + provider: Provider name + + Returns: + True if the provider is enabled + """ + provider_key = provider.lower() + return self.enabled_providers.get(provider_key, True) # Default to enabled + + def bulk_set_provider_settings(self, provider_settings: dict) -> dict: + """ + Set multiple provider settings at once. + + Args: + provider_settings: Dict of provider_name -> {'enabled': bool, ...} + + Returns: + Dict with results for each provider + """ + results = {} + + for provider_name, settings in provider_settings.items(): + provider_key = provider_name.lower() + + try: + if 'enabled' in settings: + self.enabled_providers[provider_key] = settings['enabled'] + results[provider_key] = {'success': True, 'enabled': settings['enabled']} + else: + results[provider_key] = {'success': False, 'error': 'No enabled setting provided'} + except Exception as e: + results[provider_key] = {'success': False, 'error': str(e)} + + return results + def get_api_key(self, provider: str) -> Optional[str]: """Get API key for a provider.""" return self.api_keys.get(provider) diff --git a/core/graph_manager.py b/core/graph_manager.py index 6086ea8..27a2a89 100644 --- a/core/graph_manager.py +++ b/core/graph_manager.py @@ -1,8 +1,10 @@ -# core/graph_manager.py +# dnsrecon-reduced/core/graph_manager.py """ Graph data model for DNSRecon using NetworkX. Manages in-memory graph storage with confidence scoring and forensic metadata. +Now fully compatible with the unified ProviderResult data model. +UPDATED: Fixed correlation exclusion keys to match actual attribute names. """ import re from datetime import datetime, timezone @@ -16,7 +18,8 @@ class NodeType(Enum): """Enumeration of supported node types.""" DOMAIN = "domain" IP = "ip" - ASN = "asn" + ISP = "isp" + CA = "ca" LARGE_ENTITY = "large_entity" CORRELATION_OBJECT = "correlation_object" @@ -28,6 +31,7 @@ class GraphManager: """ Thread-safe graph manager for DNSRecon infrastructure mapping. Uses NetworkX for in-memory graph storage with confidence scoring. + Compatible with unified ProviderResult data model. """ def __init__(self): @@ -38,6 +42,31 @@ class GraphManager: self.correlation_index = {} # Compile regex for date filtering for efficiency self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}') + + # FIXED: Exclude cert_issuer_name since we already create proper CA relationships + self.EXCLUDED_KEYS = [ + # Certificate metadata that creates noise or has dedicated node types + 'cert_source', # Always 'crtsh' for crtsh provider + 'cert_common_name', + 'cert_validity_period_days', # Numerical, not useful for correlation + 'cert_issuer_name', # FIXED: Has dedicated CA nodes, don't correlate + #'cert_certificate_id', # Unique per certificate + #'cert_serial_number', # Unique per certificate + 'cert_entry_timestamp', # Timestamp, filtered by date regex anyway + 'cert_not_before', # Date, filtered by date regex anyway + 'cert_not_after', # Date, filtered by date regex anyway + # DNS metadata that creates noise + 'dns_ttl', # TTL values are not meaningful for correlation + # Shodan metadata that might create noise + 'timestamp', # Generic timestamp fields + 'last_update', # Generic timestamp fields + #'org', # Too generic, causes false correlations + #'isp', # Too generic, causes false correlations + # Generic noisy attributes + 'updated_timestamp', # Any timestamp field + 'discovery_timestamp', # Any timestamp field + 'query_timestamp', # Any timestamp field + ] def __getstate__(self): """Prepare GraphManager for pickling, excluding compiled regex.""" @@ -52,245 +81,138 @@ class GraphManager: self.__dict__.update(state) self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}') - def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = [], parent_attr: str = ""): - """Recursively traverse metadata and add hashable values to the index with better path tracking.""" - if path is None: - path = [] - - if isinstance(data, dict): - for key, value in data.items(): - self._update_correlation_index(node_id, value, path + [key], key) - elif isinstance(data, list): - for i, item in enumerate(data): - # Instead of just using [i], include the parent attribute context - list_path_component = f"[{i}]" if not parent_attr else f"{parent_attr}[{i}]" - self._update_correlation_index(node_id, item, path + [list_path_component], parent_attr) - else: - self._add_to_correlation_index(node_id, data, ".".join(path), parent_attr) - - def _add_to_correlation_index(self, node_id: str, value: Any, path_str: str, parent_attr: str = ""): - """Add a hashable value to the correlation index, filtering out noise.""" - if not isinstance(value, (str, int, float, bool)) or value is None: + def process_correlations_for_node(self, node_id: str): + """ + UPDATED: Process correlations for a given node with enhanced tracking. + Now properly tracks which attribute/provider created each correlation. + """ + if not self.graph.has_node(node_id): return - # Ignore certain paths that contain noisy, non-unique identifiers - if any(keyword in path_str.lower() for keyword in ['count', 'total', 'timestamp', 'date']): - return - - # Filter out common low-entropy values and date-like strings - if isinstance(value, str): - # FIXED: Prevent correlation on date/time strings. - if self.date_pattern.match(value): - return - if len(value) < 4 or value.lower() in ['true', 'false', 'unknown', 'none', 'crt.sh']: - return - elif isinstance(value, int) and (abs(value) < 1024 or abs(value) > 65535): - return # Ignore small integers and common port numbers - elif isinstance(value, bool): - return # Ignore boolean values - - # Add the valuable correlation data to the index - if value not in self.correlation_index: - self.correlation_index[value] = {} - if node_id not in self.correlation_index[value]: - self.correlation_index[value][node_id] = [] + node_attributes = self.graph.nodes[node_id].get('attributes', []) - # Store both the full path and the parent attribute for better edge labeling - correlation_entry = { - 'path': path_str, - 'parent_attr': parent_attr, - 'meaningful_attr': self._extract_meaningful_attribute(path_str, parent_attr) - } - - if correlation_entry not in self.correlation_index[value][node_id]: - self.correlation_index[value][node_id].append(correlation_entry) + # Process each attribute for potential correlations + for attr in node_attributes: + attr_name = attr.get('name') + attr_value = attr.get('value') + attr_provider = attr.get('provider', 'unknown') - def _extract_meaningful_attribute(self, path_str: str, parent_attr: str = "") -> str: - """Extract the most meaningful attribute name from a path string.""" - if not path_str: - return "unknown" - - path_parts = path_str.split('.') - - # Look for the last non-array-index part - for part in reversed(path_parts): - # Skip array indices like [0], [1], etc. - if not (part.startswith('[') and part.endswith(']') and part[1:-1].isdigit()): - # Clean up compound names like "hostnames[0]" to just "hostnames" - clean_part = re.sub(r'\[\d+\]$', '', part) - if clean_part: - return clean_part - - # Fallback to parent attribute if available - if parent_attr: - return parent_attr - - # Last resort - use the first meaningful part - for part in path_parts: - if not (part.startswith('[') and part.endswith(']') and part[1:-1].isdigit()): - clean_part = re.sub(r'\[\d+\]$', '', part) - if clean_part: - return clean_part - - return "correlation" + # IMPROVED: More comprehensive exclusion logic + should_exclude = ( + # Check against excluded keys (exact match or substring) + any(excluded_key in attr_name or attr_name == excluded_key for excluded_key in self.EXCLUDED_KEYS) or + # Invalid value types + not isinstance(attr_value, (str, int, float, bool)) or + attr_value is None or + # Boolean values are not useful for correlation + isinstance(attr_value, bool) or + # String values that are too short or are dates + (isinstance(attr_value, str) and ( + len(attr_value) < 4 or + self.date_pattern.match(attr_value) or + # Exclude common generic values that create noise + attr_value.lower() in ['unknown', 'none', 'null', 'n/a', 'true', 'false', '0', '1'] + )) or + # Numerical values that are likely to be unique identifiers + (isinstance(attr_value, (int, float)) and ( + attr_value == 0 or # Zero values are not meaningful + attr_value == 1 or # One values are too common + abs(attr_value) > 1000000 # Very large numbers are likely IDs + )) + ) - def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = [], parent_attr: str = "") -> List[Dict]: - """Recursively traverse metadata to find correlations with existing data.""" - if path is None: - path = [] + if should_exclude: + continue - all_correlations = [] - if isinstance(data, dict): - for key, value in data.items(): - if key == 'source': # Avoid correlating on the provider name - continue - all_correlations.extend(self._check_for_correlations(new_node_id, value, path + [key], key)) - elif isinstance(data, list): - for i, item in enumerate(data): - list_path_component = f"[{i}]" if not parent_attr else f"{parent_attr}[{i}]" - all_correlations.extend(self._check_for_correlations(new_node_id, item, path + [list_path_component], parent_attr)) - else: - value = data - if value in self.correlation_index: - existing_nodes_with_paths = self.correlation_index[value] - unique_nodes = set(existing_nodes_with_paths.keys()) - unique_nodes.add(new_node_id) - - if len(unique_nodes) < 2: - return all_correlations # Correlation must involve at least two distinct nodes - - new_source = { - 'node_id': new_node_id, - 'path': ".".join(path), - 'parent_attr': parent_attr, - 'meaningful_attr': self._extract_meaningful_attribute(".".join(path), parent_attr) + # Initialize correlation tracking for this value + if attr_value not in self.correlation_index: + self.correlation_index[attr_value] = { + 'nodes': set(), + 'sources': [] # Track which provider/attribute combinations contributed } - all_sources = [new_source] - - for node_id, path_entries in existing_nodes_with_paths.items(): - for entry in path_entries: - if isinstance(entry, dict): - all_sources.append({ - 'node_id': node_id, - 'path': entry['path'], - 'parent_attr': entry.get('parent_attr', ''), - 'meaningful_attr': entry.get('meaningful_attr', self._extract_meaningful_attribute(entry['path'], entry.get('parent_attr', ''))) - }) - else: - # Handle legacy string-only entries - all_sources.append({ - 'node_id': node_id, - 'path': str(entry), - 'parent_attr': '', - 'meaningful_attr': self._extract_meaningful_attribute(str(entry)) - }) - all_correlations.append({ - 'value': value, - 'sources': all_sources, - 'nodes': list(unique_nodes) - }) - return all_correlations + # Add this node and source information + self.correlation_index[attr_value]['nodes'].add(node_id) + + # Track the source of this correlation value + source_info = { + 'node_id': node_id, + 'provider': attr_provider, + 'attribute': attr_name, + 'path': f"{attr_provider}_{attr_name}" + } + + # Add source if not already present (avoid duplicates) + existing_sources = [s for s in self.correlation_index[attr_value]['sources'] + if s['node_id'] == node_id and s['path'] == source_info['path']] + if not existing_sources: + self.correlation_index[attr_value]['sources'].append(source_info) - def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[Dict[str, Any]] = None, - description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool: - """Add a node to the graph, update attributes, and process correlations.""" - is_new_node = not self.graph.has_node(node_id) - if is_new_node: - self.graph.add_node(node_id, type=node_type.value, - added_timestamp=datetime.now(timezone.utc).isoformat(), - attributes=attributes or {}, - description=description, - metadata=metadata or {}) - else: - # Safely merge new attributes into existing attributes - if attributes: - existing_attributes = self.graph.nodes[node_id].get('attributes', {}) - existing_attributes.update(attributes) - self.graph.nodes[node_id]['attributes'] = existing_attributes - if description: - self.graph.nodes[node_id]['description'] = description - if metadata: - existing_metadata = self.graph.nodes[node_id].get('metadata', {}) - existing_metadata.update(metadata) - self.graph.nodes[node_id]['metadata'] = existing_metadata + # Create correlation node if we have multiple nodes with this value + if len(self.correlation_index[attr_value]['nodes']) > 1: + self._create_enhanced_correlation_node_and_edges(attr_value, self.correlation_index[attr_value]) - if attributes and node_type != NodeType.CORRELATION_OBJECT: - correlations = self._check_for_correlations(node_id, attributes) - for corr in correlations: - value = corr['value'] - - # STEP 1: Substring check against all existing nodes - if self._correlation_value_matches_existing_node(value): - # Skip creating correlation node - would be redundant - continue - - eligible_nodes = set(corr['nodes']) - - if len(eligible_nodes) < 2: - # Need at least 2 nodes to create a correlation - continue - - # STEP 3: Check for existing correlation node with same connection pattern - correlation_nodes_with_pattern = self._find_correlation_nodes_with_same_pattern(eligible_nodes) - - if correlation_nodes_with_pattern: - # STEP 4: Merge with existing correlation node - target_correlation_node = correlation_nodes_with_pattern[0] - self._merge_correlation_values(target_correlation_node, value, corr) - else: - # STEP 5: Create new correlation node for eligible nodes only - correlation_node_id = f"corr_{abs(hash(str(sorted(eligible_nodes))))}" - self.add_node(correlation_node_id, NodeType.CORRELATION_OBJECT, - metadata={'values': [value], 'sources': corr['sources'], - 'correlated_nodes': list(eligible_nodes)}) - - # Create edges from eligible nodes to this correlation node with better labeling - for c_node_id in eligible_nodes: - if self.graph.has_node(c_node_id): - # Find the best attribute name for this node - meaningful_attr = self._find_best_attribute_name_for_node(c_node_id, corr['sources']) - relationship_type = f"c_{meaningful_attr}" - self.add_edge(c_node_id, correlation_node_id, relationship_type, confidence_score=0.9) - - self._update_correlation_index(node_id, attributes) - - self.last_modified = datetime.now(timezone.utc).isoformat() - return is_new_node - - def _find_best_attribute_name_for_node(self, node_id: str, sources: List[Dict]) -> str: - """Find the best attribute name for a correlation edge by looking at the sources.""" - node_sources = [s for s in sources if s['node_id'] == node_id] + def _create_enhanced_correlation_node_and_edges(self, value, correlation_data): + """ + UPDATED: Create correlation node and edges with raw provider data (no formatting). + """ + correlation_node_id = f"corr_{hash(str(value)) & 0x7FFFFFFF}" + nodes = correlation_data['nodes'] + sources = correlation_data['sources'] - if not node_sources: - return "correlation" - - # Use the meaningful_attr if available - for source in node_sources: - meaningful_attr = source.get('meaningful_attr') - if meaningful_attr and meaningful_attr != "unknown": - return meaningful_attr - - # Fallback to parent_attr - for source in node_sources: - parent_attr = source.get('parent_attr') - if parent_attr: - return parent_attr - - # Last resort - extract from path - for source in node_sources: - path = source.get('path', '') - if path: - extracted = self._extract_meaningful_attribute(path) - if extracted != "unknown": - return extracted - - return "correlation" + # Create or update correlation node + if not self.graph.has_node(correlation_node_id): + # Use raw provider/attribute data - no formatting + provider_counts = {} + for source in sources: + # Keep original provider and attribute names + key = f"{source['provider']}_{source['attribute']}" + provider_counts[key] = provider_counts.get(key, 0) + 1 + + # Use the most common provider/attribute as the primary label (raw) + primary_source = max(provider_counts.items(), key=lambda x: x[1])[0] if provider_counts else "unknown_correlation" + + metadata = { + 'value': value, + 'correlated_nodes': list(nodes), + 'sources': sources, + 'primary_source': primary_source, + 'correlation_count': len(nodes) + } + + self.add_node(correlation_node_id, NodeType.CORRELATION_OBJECT, metadata=metadata) + #print(f"Created correlation node {correlation_node_id} for value '{value}' with {len(nodes)} nodes") + + # Create edges from each node to the correlation node + for source in sources: + node_id = source['node_id'] + provider = source['provider'] + attribute = source['attribute'] + + if self.graph.has_node(node_id) and not self.graph.has_edge(node_id, correlation_node_id): + # Format relationship label as "corr_provider_attribute" + relationship_label = f"corr_{provider}_{attribute}" + + self.add_edge( + source_id=node_id, + target_id=correlation_node_id, + relationship_type=relationship_label, + confidence_score=0.9, + source_provider=provider, + raw_data={ + 'correlation_value': value, + 'original_attribute': attribute, + 'correlation_type': 'attribute_matching' + } + ) + + #print(f"Added correlation edge: {node_id} -> {correlation_node_id} ({relationship_label})") + def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool: """ Check if there's a direct edge between two nodes in either direction. - Returns True if node_a→node_b OR node_b→node_a exists. + Returns True if node_aâ†'node_b OR node_bâ†'node_a exists. """ return (self.graph.has_edge(node_a, node_b) or self.graph.has_edge(node_b, node_a)) @@ -382,19 +304,60 @@ class GraphManager: f"across {node_count} nodes" ) + def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[List[Dict[str, Any]]] = None, + description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool: + """ + Add a node to the graph, update attributes, and process correlations. + Now compatible with unified data model - attributes are dictionaries from converted StandardAttribute objects. + """ + is_new_node = not self.graph.has_node(node_id) + if is_new_node: + self.graph.add_node(node_id, type=node_type.value, + added_timestamp=datetime.now(timezone.utc).isoformat(), + attributes=attributes or [], # Store as a list from the start + description=description, + metadata=metadata or {}) + else: + # Safely merge new attributes into the existing list of attributes + if attributes: + existing_attributes = self.graph.nodes[node_id].get('attributes', []) + + # Handle cases where old data might still be in dictionary format + if not isinstance(existing_attributes, list): + existing_attributes = [] + + # Create a set of existing attribute names for efficient duplicate checking + existing_attr_names = {attr['name'] for attr in existing_attributes} + + for new_attr in attributes: + if new_attr['name'] not in existing_attr_names: + existing_attributes.append(new_attr) + existing_attr_names.add(new_attr['name']) + + self.graph.nodes[node_id]['attributes'] = existing_attributes + if description: + self.graph.nodes[node_id]['description'] = description + if metadata: + existing_metadata = self.graph.nodes[node_id].get('metadata', {}) + existing_metadata.update(metadata) + self.graph.nodes[node_id]['metadata'] = existing_metadata + + self.last_modified = datetime.now(timezone.utc).isoformat() + return is_new_node + def add_edge(self, source_id: str, target_id: str, relationship_type: str, - confidence_score: float = 0.5, source_provider: str = "unknown", - raw_data: Optional[Dict[str, Any]] = None) -> bool: - """Add or update an edge between two nodes, ensuring nodes exist.""" + confidence_score: float = 0.5, source_provider: str = "unknown", + raw_data: Optional[Dict[str, Any]] = None) -> bool: + """ + UPDATED: Add or update an edge between two nodes with raw relationship labels. + """ if not self.graph.has_node(source_id) or not self.graph.has_node(target_id): return False new_confidence = confidence_score - if relationship_type.startswith("c_"): - edge_label = relationship_type - else: - edge_label = f"{source_provider}_{relationship_type}" + # UPDATED: Use raw relationship type - no formatting + edge_label = relationship_type if self.graph.has_edge(source_id, target_id): # If edge exists, update confidence if the new score is higher. @@ -404,7 +367,7 @@ class GraphManager: self.graph.edges[source_id, target_id]['updated_by'] = source_provider return False - # Add a new edge with all attributes. + # Add a new edge with raw attributes self.graph.add_edge(source_id, target_id, relationship_type=edge_label, confidence_score=new_confidence, @@ -413,7 +376,7 @@ class GraphManager: raw_data=raw_data or {}) self.last_modified = datetime.now(timezone.utc).isoformat() return True - + def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool: """ Removes a node from a large entity's internal lists and updates its count. @@ -423,13 +386,19 @@ class GraphManager: return False node_data = self.graph.nodes[large_entity_id] - attributes = node_data.get('attributes', {}) + attributes = node_data.get('attributes', []) + + # Find the 'nodes' attribute dictionary in the list + nodes_attr = next((attr for attr in attributes if attr.get('name') == 'nodes'), None) # Remove from the list of member nodes - if 'nodes' in attributes and node_id_to_extract in attributes['nodes']: - attributes['nodes'].remove(node_id_to_extract) - # Update the count - attributes['count'] = len(attributes['nodes']) + if nodes_attr and 'value' in nodes_attr and isinstance(nodes_attr['value'], list) and node_id_to_extract in nodes_attr['value']: + nodes_attr['value'].remove(node_id_to_extract) + + # Find the 'count' attribute and update it + count_attr = next((attr for attr in attributes if attr.get('name') == 'count'), None) + if count_attr: + count_attr['value'] = len(nodes_attr['value']) else: # This can happen if the node was already extracted, which is not an error. print(f"Warning: Node {node_id_to_extract} not found in the 'nodes' list of {large_entity_id}.") @@ -448,11 +417,21 @@ class GraphManager: # Clean up the correlation index keys_to_delete = [] - for value, nodes in self.correlation_index.items(): - if node_id in nodes: - del nodes[node_id] - if not nodes: # If no other nodes are associated with this value, remove it - keys_to_delete.append(value) + for value, data in self.correlation_index.items(): + if isinstance(data, dict) and 'nodes' in data: + # Updated correlation structure + if node_id in data['nodes']: + data['nodes'].discard(node_id) + # Remove sources for this node + data['sources'] = [s for s in data['sources'] if s['node_id'] != node_id] + if not data['nodes']: # If no other nodes are associated, remove it + keys_to_delete.append(value) + else: + # Legacy correlation structure (fallback) + if isinstance(data, set) and node_id in data: + data.discard(node_id) + if not data: + keys_to_delete.append(value) for key in keys_to_delete: if key in self.correlation_index: @@ -473,54 +452,59 @@ class GraphManager: """Get all nodes of a specific type.""" return [n for n, d in self.graph.nodes(data=True) if d.get('type') == node_type.value] - def get_neighbors(self, node_id: str) -> List[str]: - """Get all unique neighbors (predecessors and successors) for a node.""" - if not self.graph.has_node(node_id): - return [] - return list(set(self.graph.predecessors(node_id)) | set(self.graph.successors(node_id))) - def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]: """Get edges with confidence score above a given threshold.""" return [(u, v, d) for u, v, d in self.graph.edges(data=True) if d.get('confidence_score', 0) >= min_confidence] def get_graph_data(self) -> Dict[str, Any]: - """Export graph data formatted for frontend visualization.""" + """ + Export graph data formatted for frontend visualization. + SIMPLIFIED: No certificate styling - frontend handles all visual styling. + """ nodes = [] for node_id, attrs in self.graph.nodes(data=True): - node_data = {'id': node_id, 'label': node_id, 'type': attrs.get('type', 'unknown'), - 'attributes': attrs.get('attributes', {}), - 'description': attrs.get('description', ''), - 'metadata': attrs.get('metadata', {}), - 'added_timestamp': attrs.get('added_timestamp')} - # Customize node appearance based on type and attributes - node_type = node_data['type'] - attributes = node_data['attributes'] - if node_type == 'domain' and attributes.get('certificates', {}).get('has_valid_cert') is False: - node_data['color'] = {'background': '#c7c7c7', 'border': '#999'} # Gray for invalid cert + node_data = { + 'id': node_id, + 'label': node_id, + 'type': attrs.get('type', 'unknown'), + 'attributes': attrs.get('attributes', []), # Raw attributes list + 'description': attrs.get('description', ''), + 'metadata': attrs.get('metadata', {}), + 'added_timestamp': attrs.get('added_timestamp') + } # Add incoming and outgoing edges to node data if self.graph.has_node(node_id): - node_data['incoming_edges'] = [{'from': u, 'data': d} for u, _, d in self.graph.in_edges(node_id, data=True)] - node_data['outgoing_edges'] = [{'to': v, 'data': d} for _, v, d in self.graph.out_edges(node_id, data=True)] + node_data['incoming_edges'] = [ + {'from': u, 'data': d} for u, _, d in self.graph.in_edges(node_id, data=True) + ] + node_data['outgoing_edges'] = [ + {'to': v, 'data': d} for _, v, d in self.graph.out_edges(node_id, data=True) + ] nodes.append(node_data) edges = [] for source, target, attrs in self.graph.edges(data=True): - edges.append({'from': source, 'to': target, - 'label': attrs.get('relationship_type', ''), - 'confidence_score': attrs.get('confidence_score', 0), - 'source_provider': attrs.get('source_provider', ''), - 'discovery_timestamp': attrs.get('discovery_timestamp')}) + edges.append({ + 'from': source, + 'to': target, + 'label': attrs.get('relationship_type', ''), + 'confidence_score': attrs.get('confidence_score', 0), + 'source_provider': attrs.get('source_provider', ''), + 'discovery_timestamp': attrs.get('discovery_timestamp') + }) + return { - 'nodes': nodes, 'edges': edges, + 'nodes': nodes, + 'edges': edges, 'statistics': self.get_statistics()['basic_metrics'] } def export_json(self) -> Dict[str, Any]: """Export complete graph data as a JSON-serializable dictionary.""" - graph_data = nx.node_link_data(self.graph) # Use NetworkX's built-in robust serializer + graph_data = nx.node_link_data(self.graph, edges="edges") return { 'export_metadata': { 'export_timestamp': datetime.now(timezone.utc).isoformat(), @@ -528,15 +512,20 @@ class GraphManager: 'last_modified': self.last_modified, 'total_nodes': self.get_node_count(), 'total_edges': self.get_edge_count(), - 'graph_format': 'dnsrecon_v1_nodeling' + 'graph_format': 'dnsrecon_v1_unified_model' }, 'graph': graph_data, 'statistics': self.get_statistics() } def _get_confidence_distribution(self) -> Dict[str, int]: - """Get distribution of edge confidence scores.""" + """Get distribution of edge confidence scores with empty graph handling.""" distribution = {'high': 0, 'medium': 0, 'low': 0} + + # FIXED: Handle empty graph case + if self.get_edge_count() == 0: + return distribution + for _, _, data in self.graph.edges(data=True): confidence = data.get('confidence_score', 0) if confidence >= 0.8: @@ -548,22 +537,42 @@ class GraphManager: return distribution def get_statistics(self) -> Dict[str, Any]: - """Get comprehensive statistics about the graph.""" - stats = {'basic_metrics': {'total_nodes': self.get_node_count(), - 'total_edges': self.get_edge_count(), - 'creation_time': self.creation_time, - 'last_modified': self.last_modified}, - 'node_type_distribution': {}, 'relationship_type_distribution': {}, - 'confidence_distribution': self._get_confidence_distribution(), - 'provider_distribution': {}} - # Calculate distributions - for node_type in NodeType: - stats['node_type_distribution'][node_type.value] = self.get_nodes_by_type(node_type).__len__() - for _, _, data in self.graph.edges(data=True): - rel_type = data.get('relationship_type', 'unknown') - stats['relationship_type_distribution'][rel_type] = stats['relationship_type_distribution'].get(rel_type, 0) + 1 - provider = data.get('source_provider', 'unknown') - stats['provider_distribution'][provider] = stats['provider_distribution'].get(provider, 0) + 1 + """Get comprehensive statistics about the graph with proper empty graph handling.""" + + # FIXED: Handle empty graph case properly + node_count = self.get_node_count() + edge_count = self.get_edge_count() + + stats = { + 'basic_metrics': { + 'total_nodes': node_count, + 'total_edges': edge_count, + 'creation_time': self.creation_time, + 'last_modified': self.last_modified + }, + 'node_type_distribution': {}, + 'relationship_type_distribution': {}, + 'confidence_distribution': self._get_confidence_distribution(), + 'provider_distribution': {} + } + + # FIXED: Only calculate distributions if we have data + if node_count > 0: + # Calculate node type distributions + for node_type in NodeType: + count = len(self.get_nodes_by_type(node_type)) + if count > 0: # Only include types that exist + stats['node_type_distribution'][node_type.value] = count + + if edge_count > 0: + # Calculate edge distributions + for _, _, data in self.graph.edges(data=True): + rel_type = data.get('relationship_type', 'unknown') + stats['relationship_type_distribution'][rel_type] = stats['relationship_type_distribution'].get(rel_type, 0) + 1 + + provider = data.get('source_provider', 'unknown') + stats['provider_distribution'][provider] = stats['provider_distribution'].get(provider, 0) + 1 + return stats def clear(self) -> None: diff --git a/core/logger.py b/core/logger.py index f391030..e774a2d 100644 --- a/core/logger.py +++ b/core/logger.py @@ -152,7 +152,7 @@ class ForensicLogger: # Log to standard logger if error: - self.logger.error(f"API Request Failed - {provider}: {url} - {error}") + self.logger.error(f"API Request Failed.") else: self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}") @@ -197,7 +197,7 @@ class ForensicLogger: self.logger.info(f"Scan Started - Target: {target_domain}, Depth: {recursion_depth}") self.logger.info(f"Enabled Providers: {', '.join(enabled_providers)}") - self.session_metadata['target_domains'].add(target_domain) + self.session_metadata['target_domains'].update(target_domain) def log_scan_complete(self) -> None: """Log the completion of a reconnaissance scan.""" diff --git a/core/provider_result.py b/core/provider_result.py new file mode 100644 index 0000000..7355cf4 --- /dev/null +++ b/core/provider_result.py @@ -0,0 +1,107 @@ +# dnsrecon-reduced/core/provider_result.py + +""" +Unified data model for DNSRecon passive reconnaissance. +Standardizes the data structure across all providers to ensure consistent processing. +""" + +from typing import Any, Optional, List, Dict +from dataclasses import dataclass, field +from datetime import datetime, timezone + + +@dataclass +class StandardAttribute: + """A unified data structure for a single piece of information about a node.""" + target_node: str + name: str + value: Any + type: str + provider: str + confidence: float + timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + metadata: Optional[Dict[str, Any]] = field(default_factory=dict) + + def __post_init__(self): + """Validate the attribute after initialization.""" + if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0: + raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}") + + +@dataclass +class Relationship: + """A unified data structure for a directional link between two nodes.""" + source_node: str + target_node: str + relationship_type: str + confidence: float + provider: str + timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + raw_data: Optional[Dict[str, Any]] = field(default_factory=dict) + + def __post_init__(self): + """Validate the relationship after initialization.""" + if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0: + raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}") + + +@dataclass +class ProviderResult: + """A container for all data returned by a provider from a single query.""" + attributes: List[StandardAttribute] = field(default_factory=list) + relationships: List[Relationship] = field(default_factory=list) + + def add_attribute(self, target_node: str, name: str, value: Any, attr_type: str, + provider: str, confidence: float = 0.8, + metadata: Optional[Dict[str, Any]] = None) -> None: + """Helper method to add an attribute to the result.""" + self.attributes.append(StandardAttribute( + target_node=target_node, + name=name, + value=value, + type=attr_type, + provider=provider, + confidence=confidence, + metadata=metadata or {} + )) + + def add_relationship(self, source_node: str, target_node: str, relationship_type: str, + provider: str, confidence: float = 0.8, + raw_data: Optional[Dict[str, Any]] = None) -> None: + """Helper method to add a relationship to the result.""" + self.relationships.append(Relationship( + source_node=source_node, + target_node=target_node, + relationship_type=relationship_type, + confidence=confidence, + provider=provider, + raw_data=raw_data or {} + )) + + def get_discovered_nodes(self) -> set: + """Get all unique node identifiers discovered in this result.""" + nodes = set() + + # Add nodes from relationships + for rel in self.relationships: + nodes.add(rel.source_node) + nodes.add(rel.target_node) + + # Add nodes from attributes + for attr in self.attributes: + nodes.add(attr.target_node) + + return nodes + + def get_relationship_count(self) -> int: + """Get the total number of relationships in this result.""" + return len(self.relationships) + + def get_attribute_count(self) -> int: + """Get the total number of attributes in this result.""" + return len(self.attributes) + + ##TODO + #def is_large_entity(self, threshold: int) -> bool: + # """Check if this result qualifies as a large entity based on relationship count.""" + # return self.get_relationship_count() > threshold \ No newline at end of file diff --git a/core/rate_limiter.py b/core/rate_limiter.py index 7fadff4..d5a11d6 100644 --- a/core/rate_limiter.py +++ b/core/rate_limiter.py @@ -1,7 +1,6 @@ # dnsrecon-reduced/core/rate_limiter.py import time -import redis class GlobalRateLimiter: def __init__(self, redis_client): diff --git a/core/scanner.py b/core/scanner.py index 6ce05f3..67de6d0 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -2,18 +2,20 @@ import threading import traceback -import time import os import importlib import redis +import time +import random # Imported for jitter from typing import List, Set, Dict, Any, Tuple, Optional -from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, Future +from concurrent.futures import ThreadPoolExecutor from collections import defaultdict from queue import PriorityQueue from datetime import datetime, timezone from core.graph_manager import GraphManager, NodeType from core.logger import get_forensic_logger, new_session +from core.provider_result import ProviderResult from utils.helpers import _is_valid_ip, _is_valid_domain from providers.base_provider import BaseProvider from core.rate_limiter import GlobalRateLimiter @@ -30,12 +32,11 @@ class ScanStatus: class Scanner: """ Main scanning orchestrator for DNSRecon passive reconnaissance. + UNIFIED: Combines comprehensive features with improved display formatting. """ def __init__(self, session_config=None): """Initialize scanner with session-specific configuration.""" - print("Initializing Scanner instance...") - try: # Use provided session config or create default if session_config is None: @@ -55,36 +56,41 @@ class Scanner: self.task_queue = PriorityQueue() self.target_retries = defaultdict(int) self.scan_failed_due_to_retries = False + self.initial_targets = set() - # **NEW**: Track currently processing tasks to prevent processing after stop + # Thread-safe processing tracking (from Document 1) self.currently_processing = set() self.processing_lock = threading.Lock() + # Display-friendly processing list (from Document 2) + self.currently_processing_display = [] # Scanning progress tracking self.total_indicators_found = 0 self.indicators_processed = 0 self.indicators_completed = 0 self.tasks_re_enqueued = 0 + self.tasks_skipped = 0 # BUGFIX: Initialize tasks_skipped self.total_tasks_ever_enqueued = 0 self.current_indicator = "" + self.last_task_from_queue = None # Concurrent processing configuration self.max_workers = self.config.max_concurrent_requests self.executor = None + # Status logger thread with improved formatting + self.status_logger_thread = None + self.status_logger_stop_event = threading.Event() + # Initialize providers with session config - print("Calling _initialize_providers with session config...") self._initialize_providers() # Initialize logger - print("Initializing forensic logger...") self.logger = get_forensic_logger() # Initialize global rate limiter self.rate_limiter = GlobalRateLimiter(redis.StrictRedis(db=0)) - print("Scanner initialization complete") - except Exception as e: print(f"ERROR: Scanner initialization failed: {e}") traceback.print_exc() @@ -95,17 +101,14 @@ class Scanner: Check if stop is requested using both local and Redis-based signals. This ensures reliable termination across process boundaries. """ - # Check local threading event first (fastest) if self.stop_event.is_set(): return True - # Check Redis-based stop signal if session ID is available if self.session_id: try: from core.session_manager import session_manager return session_manager.is_stop_requested(self.session_id) except Exception as e: - print(f"Error checking Redis stop signal: {e}") # Fall back to local event return self.stop_event.is_set() @@ -115,22 +118,19 @@ class Scanner: """ Set stop signal both locally and in Redis. """ - # Set local event self.stop_event.set() - # Set Redis signal if session ID is available if self.session_id: try: from core.session_manager import session_manager session_manager.set_stop_signal(self.session_id) except Exception as e: - print(f"Error setting Redis stop signal: {e}") + pass def __getstate__(self): """Prepare object for pickling by excluding unpicklable attributes.""" state = self.__dict__.copy() - # Remove unpicklable threading objects unpicklable_attrs = [ 'stop_event', 'scan_thread', @@ -138,14 +138,15 @@ class Scanner: 'processing_lock', 'task_queue', 'rate_limiter', - 'logger' + 'logger', + 'status_logger_thread', + 'status_logger_stop_event' ] for attr in unpicklable_attrs: if attr in state: del state[attr] - # Handle providers separately to ensure they're picklable if 'providers' in state: for provider in state['providers']: if hasattr(provider, '_stop_event'): @@ -157,7 +158,6 @@ class Scanner: """Restore object after unpickling by reconstructing threading objects.""" self.__dict__.update(state) - # Reconstruct threading objects self.stop_event = threading.Event() self.scan_thread = None self.executor = None @@ -165,15 +165,18 @@ class Scanner: self.task_queue = PriorityQueue() self.rate_limiter = GlobalRateLimiter(redis.StrictRedis(db=0)) self.logger = get_forensic_logger() + self.status_logger_thread = None + self.status_logger_stop_event = threading.Event() if not hasattr(self, 'providers') or not self.providers: - print("Providers not found after loading session, re-initializing...") self._initialize_providers() if not hasattr(self, 'currently_processing'): self.currently_processing = set() - # Re-set stop events for providers + if not hasattr(self, 'currently_processing_display'): + self.currently_processing_display = [] + if hasattr(self, 'providers'): for provider in self.providers: if hasattr(provider, 'set_stop_event'): @@ -182,8 +185,6 @@ class Scanner: def _initialize_providers(self) -> None: """Initialize all available providers based on session configuration.""" self.providers = [] - print("Initializing providers with session config...") - provider_dir = os.path.join(os.path.dirname(__file__), '..', 'providers') for filename in os.listdir(provider_dir): if filename.endswith('_provider.py') and not filename.startswith('base'): @@ -201,96 +202,105 @@ class Scanner: if provider.is_available(): provider.set_stop_event(self.stop_event) self.providers.append(provider) - print(f"✓ {provider.get_display_name()} provider initialized successfully for session") - else: - print(f"✗ {provider.get_display_name()} provider is not available") except Exception as e: - print(f"✗ Failed to initialize provider from {filename}: {e}") traceback.print_exc() - print(f"Initialized {len(self.providers)} providers for session") + def _status_logger_thread(self): + """Periodically prints a clean, formatted scan status to the terminal.""" + HEADER = "\033[95m" + CYAN = "\033[96m" + GREEN = "\033[92m" + YELLOW = "\033[93m" + BLUE = "\033[94m" + ENDC = "\033[0m" + BOLD = "\033[1m" + + last_status_str = "" + while not self.status_logger_stop_event.is_set(): + try: + with self.processing_lock: + in_flight_tasks = list(self.currently_processing) + self.currently_processing_display = in_flight_tasks.copy() - def update_session_config(self, new_config) -> None: - """Update session configuration and reinitialize providers.""" - print("Updating session configuration...") - self.config = new_config - self.max_workers = self.config.max_concurrent_requests - self._initialize_providers() - print("Session configuration updated") + status_str = ( + f"{BOLD}{HEADER}Scan Status: {self.status.upper()}{ENDC} | " + f"{CYAN}Queued: {self.task_queue.qsize()}{ENDC} | " + f"{YELLOW}In-Flight: {len(in_flight_tasks)}{ENDC} | " + f"{GREEN}Completed: {self.indicators_completed}{ENDC} | " + f"Skipped: {self.tasks_skipped} | " + f"Rescheduled: {self.tasks_re_enqueued}" + ) + + if status_str != last_status_str: + print(f"\n{'-'*80}") + print(status_str) + if self.last_task_from_queue: + # Unpack the new time-based queue item + _, p, (pn, ti, d) = self.last_task_from_queue + print(f"{BLUE}Last task dequeued -> Prio:{p} | Provider:{pn} | Target:'{ti}' | Depth:{d}{ENDC}") + if in_flight_tasks: + print(f"{BOLD}{YELLOW}Currently Processing:{ENDC}") + display_tasks = [f" - {p}: {t}" for p, t in in_flight_tasks[:3]] + print("\n".join(display_tasks)) + if len(in_flight_tasks) > 3: + print(f" ... and {len(in_flight_tasks) - 3} more") + print(f"{'-'*80}") + last_status_str = status_str + except Exception: + pass + + time.sleep(2) def start_scan(self, target: str, max_depth: int = 2, clear_graph: bool = True, force_rescan_target: Optional[str] = None) -> bool: - """Start a new reconnaissance scan with proper cleanup of previous scans.""" - print(f"=== STARTING SCAN IN SCANNER {id(self)} ===") - print(f"Session ID: {self.session_id}") - print(f"Initial scanner status: {self.status}") - self.total_tasks_ever_enqueued = 0 - - # **IMPROVED**: More aggressive cleanup of previous scan + """ + Starts a new reconnaissance scan. + """ if self.scan_thread and self.scan_thread.is_alive(): - print("A previous scan thread is still alive. Forcing termination...") - - # Set stop signals immediately self._set_stop_signal() self.status = ScanStatus.STOPPED - - # Clear all processing state with self.processing_lock: self.currently_processing.clear() + self.currently_processing_display = [] self.task_queue = PriorityQueue() - - # Shutdown executor aggressively if self.executor: - print("Shutting down executor forcefully...") self.executor.shutdown(wait=False, cancel_futures=True) self.executor = None - - # Wait for thread termination with shorter timeout - print("Waiting for previous scan thread to terminate...") - self.scan_thread.join(5.0) # Reduced from 10 seconds - - if self.scan_thread.is_alive(): - print("WARNING: Previous scan thread is still alive after 5 seconds") - # Continue anyway, but log the issue - self.logger.logger.warning("Previous scan thread failed to terminate cleanly") + self.scan_thread.join(5.0) - # Reset state for new scan with proper forensic logging - print("Resetting scanner state for new scan...") self.status = ScanStatus.IDLE self.stop_event.clear() - # **NEW**: Clear Redis stop signal explicitly if self.session_id: from core.session_manager import session_manager session_manager.clear_stop_signal(self.session_id) with self.processing_lock: self.currently_processing.clear() + self.currently_processing_display = [] self.task_queue = PriorityQueue() self.target_retries.clear() self.scan_failed_due_to_retries = False + self.tasks_skipped = 0 + self.last_task_from_queue = None - # Update session state immediately for GUI feedback self._update_session_state() - print("Scanner state reset complete.") try: if not hasattr(self, 'providers') or not self.providers: - print(f"ERROR: No providers available in scanner {id(self)}, cannot start scan") return False - print(f"Scanner {id(self)} validation passed, providers available: {[p.get_name() for p in self.providers]}") - if clear_graph: self.graph.clear() + self.initial_targets.clear() if force_rescan_target and self.graph.graph.has_node(force_rescan_target): - print(f"Forcing rescan of {force_rescan_target}, clearing provider states.") node_data = self.graph.graph.nodes[force_rescan_target] if 'metadata' in node_data and 'provider_states' in node_data['metadata']: node_data['metadata']['provider_states'] = {} self.current_target = target.lower().strip() + self.initial_targets.add(self.current_target) self.max_depth = max_depth self.current_depth = 0 @@ -298,17 +308,12 @@ class Scanner: self.indicators_processed = 0 self.indicators_completed = 0 self.tasks_re_enqueued = 0 + self.total_tasks_ever_enqueued = 0 self.current_indicator = self.current_target - # Update GUI with scan preparation state self._update_session_state() - - # Start new forensic session - print(f"Starting new forensic session for scanner {id(self)}...") self.logger = new_session() - # Start scan in a separate thread - print(f"Starting scan thread for scanner {id(self)}...") self.scan_thread = threading.Thread( target=self._execute_scan, args=(self.current_target, max_depth), @@ -316,11 +321,13 @@ class Scanner: ) self.scan_thread.start() - print(f"=== SCAN STARTED SUCCESSFULLY IN SCANNER {id(self)} ===") + self.status_logger_stop_event.clear() + self.status_logger_thread = threading.Thread(target=self._status_logger_thread, daemon=True) + self.status_logger_thread.start() + return True except Exception as e: - print(f"ERROR: Exception in start_scan for scanner {id(self)}: {e}") traceback.print_exc() self.status = ScanStatus.FAILED self._update_session_state() @@ -336,17 +343,20 @@ class Scanner: return 3 # Lowest priority def _execute_scan(self, target: str, max_depth: int) -> None: - """Execute the reconnaissance scan with proper termination handling.""" - print(f"_execute_scan started for {target} with depth {max_depth}") + """ + Execute the reconnaissance scan with a time-based, robust scheduler. + Handles rate-limiting via deferral and failures via exponential backoff. + """ self.executor = ThreadPoolExecutor(max_workers=self.max_workers) processed_tasks = set() - # Initial task population for the main target is_ip = _is_valid_ip(target) initial_providers = self._get_eligible_providers(target, is_ip, False) for provider in initial_providers: provider_name = provider.get_name() - self.task_queue.put((self._get_priority(provider_name), (provider_name, target, 0))) + priority = self._get_priority(provider_name) + # OVERHAUL: Enqueue with current timestamp to run immediately + self.task_queue.put((time.time(), priority, (provider_name, target, 0))) self.total_tasks_ever_enqueued += 1 try: @@ -356,64 +366,73 @@ class Scanner: enabled_providers = [provider.get_name() for provider in self.providers] self.logger.log_scan_start(target, max_depth, enabled_providers) - # Determine initial node type node_type = NodeType.IP if is_ip else NodeType.DOMAIN self.graph.add_node(target, node_type) - self._initialize_provider_states(target) - # Better termination checking in main loop - while not self.task_queue.empty() and not self._is_stop_requested(): + while not self._is_stop_requested(): + if self.task_queue.empty() and not self.currently_processing: + break # Scan is complete + try: - priority, (provider_name, target_item, depth) = self.task_queue.get() + # OVERHAUL: Peek at the next task to see if it's ready to run + next_run_at, _, _ = self.task_queue.queue[0] + if next_run_at > time.time(): + time.sleep(0.1) # Sleep to prevent busy-waiting for future tasks + continue + + # Task is ready, so get it from the queue + run_at, priority, (provider_name, target_item, depth) = self.task_queue.get() + self.last_task_from_queue = (run_at, priority, (provider_name, target_item, depth)) + except IndexError: - # Queue became empty during processing - break + time.sleep(0.1) # Queue is empty, but tasks might still be processing + continue task_tuple = (provider_name, target_item) if task_tuple in processed_tasks: + self.tasks_skipped += 1 + self.indicators_completed +=1 continue if depth > max_depth: continue - + + # OVERHAUL: Handle rate limiting with time-based deferral if self.rate_limiter.is_rate_limited(provider_name, self.config.get_rate_limit(provider_name), 60): - self.task_queue.put((priority + 1, (provider_name, target_item, depth))) # Postpone + defer_until = time.time() + 60 # Defer for 60 seconds + self.task_queue.put((defer_until, priority, (provider_name, target_item, depth))) + self.tasks_re_enqueued += 1 continue with self.processing_lock: - if self._is_stop_requested(): - print(f"Stop requested before processing {target_item}") - break - self.currently_processing.add(target_item) + if self._is_stop_requested(): break + self.currently_processing.add(task_tuple) try: self.current_depth = depth self.current_indicator = target_item self._update_session_state() - if self._is_stop_requested(): - print(f"Stop requested during processing setup for {target_item}") - break + if self._is_stop_requested(): break provider = next((p for p in self.providers if p.get_name() == provider_name), None) if provider: - new_targets, large_entity_members, success = self._query_single_provider_for_target(provider, target_item, depth) + new_targets, _, success = self._query_single_provider_for_target(provider, target_item, depth) - if self._is_stop_requested(): - print(f"Stop requested after querying providers for {target_item}") - break + if self._is_stop_requested(): break if not success: self.target_retries[task_tuple] += 1 if self.target_retries[task_tuple] <= self.config.max_retries_per_target: - print(f"Re-queueing task {task_tuple} (attempt {self.target_retries[task_tuple]})") - self.task_queue.put((priority, (provider_name, target_item, depth))) + # OVERHAUL: Exponential backoff for retries + retry_count = self.target_retries[task_tuple] + backoff_delay = (2 ** retry_count) + random.uniform(0, 1) # Add jitter + retry_at = time.time() + backoff_delay + self.task_queue.put((retry_at, priority, (provider_name, target_item, depth))) self.tasks_re_enqueued += 1 - self.total_tasks_ever_enqueued += 1 else: - print(f"ERROR: Max retries exceeded for task {task_tuple}") self.scan_failed_due_to_retries = True self._log_target_processing_error(str(task_tuple), "Max retries exceeded") else: @@ -421,35 +440,29 @@ class Scanner: self.indicators_completed += 1 if not self._is_stop_requested(): - all_new_targets = new_targets.union(large_entity_members) - for new_target in all_new_targets: + for new_target in new_targets: is_ip_new = _is_valid_ip(new_target) eligible_providers_new = self._get_eligible_providers(new_target, is_ip_new, False) for p_new in eligible_providers_new: p_name_new = p_new.get_name() if (p_name_new, new_target) not in processed_tasks: new_depth = depth + 1 if new_target in new_targets else depth - self.task_queue.put((self._get_priority(p_name_new), (p_name_new, new_target, new_depth))) + new_priority = self._get_priority(p_name_new) + # OVERHAUL: Enqueue new tasks to run immediately + self.task_queue.put((time.time(), new_priority, (p_name_new, new_target, new_depth))) self.total_tasks_ever_enqueued += 1 finally: with self.processing_lock: - self.currently_processing.discard(target_item) - - if self._is_stop_requested(): - print("Scan terminated due to stop request") - self.logger.logger.info("Scan terminated by user request") - elif self.task_queue.empty(): - print("Scan completed - no more targets to process") - self.logger.logger.info("Scan completed - all targets processed") + self.currently_processing.discard(task_tuple) except Exception as e: - print(f"ERROR: Scan execution failed with error: {e}") traceback.print_exc() self.status = ScanStatus.FAILED self.logger.logger.error(f"Scan failed: {e}") finally: with self.processing_lock: self.currently_processing.clear() + self.currently_processing_display = [] if self._is_stop_requested(): self.status = ScanStatus.STOPPED @@ -458,122 +471,312 @@ class Scanner: else: self.status = ScanStatus.COMPLETED + self.status_logger_stop_event.set() + if self.status_logger_thread: + self.status_logger_thread.join() + self._update_session_state() self.logger.log_scan_complete() if self.executor: self.executor.shutdown(wait=False, cancel_futures=True) self.executor = None - stats = self.graph.get_statistics() - print("Final scan statistics:") - print(f" - Total nodes: {stats['basic_metrics']['total_nodes']}") - print(f" - Total edges: {stats['basic_metrics']['total_edges']}") - print(f" - Tasks processed: {len(processed_tasks)}") def _query_single_provider_for_target(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]: + """ + Query a single provider and process the unified ProviderResult. + """ if self._is_stop_requested(): - print(f"Stop requested before querying {provider.get_name()} for {target}") return set(), set(), False is_ip = _is_valid_ip(target) target_type = NodeType.IP if is_ip else NodeType.DOMAIN - print(f"Querying {provider.get_name()} for {target_type.value}: {target} at depth {depth}") self.graph.add_node(target, target_type) self._initialize_provider_states(target) new_targets = set() large_entity_members = set() - node_attributes = defaultdict(lambda: defaultdict(list)) provider_successful = True try: - provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth) - if provider_results is None: + provider_result = self._query_single_provider_unified(provider, target, is_ip, depth) + + if provider_result is None: provider_successful = False elif not self._is_stop_requested(): - discovered, is_large_entity = self._process_provider_results( - target, provider, provider_results, node_attributes, depth + discovered, is_large_entity = self._process_provider_result_unified( + target, provider, provider_result, depth ) if is_large_entity: large_entity_members.update(discovered) else: new_targets.update(discovered) - else: - print(f"Stop requested after processing results from {provider.get_name()}") + self.graph.process_correlations_for_node(target) except Exception as e: provider_successful = False self._log_provider_error(target, provider.get_name(), str(e)) - if not self._is_stop_requested(): - for node_id, attributes in node_attributes.items(): - if self.graph.graph.has_node(node_id): - node_is_ip = _is_valid_ip(node_id) - node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN - self.graph.add_node(node_id, node_type_to_add, attributes=attributes) - return new_targets, large_entity_members, provider_successful + def _query_single_provider_unified(self, provider: BaseProvider, target: str, is_ip: bool, current_depth: int) -> Optional[ProviderResult]: + """ + Query a single provider with stop signal checking. + """ + provider_name = provider.get_name() + start_time = datetime.now(timezone.utc) + + if self._is_stop_requested(): + return None + + try: + if is_ip: + result = provider.query_ip(target) + else: + result = provider.query_domain(target) + + if self._is_stop_requested(): + return None + + relationship_count = result.get_relationship_count() if result else 0 + self._update_provider_state(target, provider_name, 'success', relationship_count, None, start_time) + + return result + + except Exception as e: + self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time) + return None + + def _process_provider_result_unified(self, target: str, provider: BaseProvider, + provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]: + """ + Process a unified ProviderResult object to update the graph. + VERIFIED: Proper ISP and CA node type assignment. + """ + provider_name = provider.get_name() + discovered_targets = set() + + if self._is_stop_requested(): + return discovered_targets, False + + # Process all attributes first, grouping by target node + attributes_by_node = defaultdict(list) + for attribute in provider_result.attributes: + attr_dict = { + "name": attribute.name, + "value": attribute.value, + "type": attribute.type, + "provider": attribute.provider, + "confidence": attribute.confidence, + "metadata": attribute.metadata + } + attributes_by_node[attribute.target_node].append(attr_dict) + + # Add attributes to existing nodes (important for ISP nodes to get ASN attributes) + for node_id, node_attributes_list in attributes_by_node.items(): + if self.graph.graph.has_node(node_id): + # Node already exists, just add attributes + if _is_valid_ip(node_id): + node_type = NodeType.IP + else: + node_type = NodeType.DOMAIN + + self.graph.add_node(node_id, node_type, attributes=node_attributes_list) + + # Check if this should be a large entity + if provider_result.get_relationship_count() > self.config.large_entity_threshold: + members = self._create_large_entity_from_provider_result(target, provider_name, provider_result, current_depth) + return members, True + + # Process relationships and create nodes with proper types + for i, relationship in enumerate(provider_result.relationships): + if i % 5 == 0 and self._is_stop_requested(): + break + + source_node = relationship.source_node + target_node = relationship.target_node + + # VERIFIED: Determine source node type + source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN + + # VERIFIED: Determine target node type based on provider and relationship + if provider_name == 'shodan' and relationship.relationship_type == 'shodan_isp': + target_type = NodeType.ISP # ISP node for Shodan organization data + elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer': + target_type = NodeType.CA # CA node for certificate issuers + elif _is_valid_ip(target_node): + target_type = NodeType.IP + else: + target_type = NodeType.DOMAIN + + # Create or update nodes with proper types + self.graph.add_node(source_node, source_type) + self.graph.add_node(target_node, target_type) + + # Add the relationship edge + if self.graph.add_edge( + source_node, target_node, + relationship.relationship_type, + relationship.confidence, + provider_name, + relationship.raw_data + ): + pass # Edge was successfully added + + # Add target to discovered nodes for further processing + if _is_valid_domain(target_node) or _is_valid_ip(target_node): + discovered_targets.add(target_node) + + return discovered_targets, False + + def _create_large_entity_from_provider_result(self, source: str, provider_name: str, + provider_result: ProviderResult, current_depth: int) -> Set[str]: + """ + Create a large entity node from a ProviderResult. + """ + entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}" + + targets = [rel.target_node for rel in provider_result.relationships] + node_type = 'unknown' + + if targets: + if _is_valid_domain(targets[0]): + node_type = 'domain' + elif _is_valid_ip(targets[0]): + node_type = 'ip' + + for target in targets: + target_node_type = NodeType.DOMAIN if node_type == 'domain' else NodeType.IP + self.graph.add_node(target, target_node_type) + + attributes_dict = { + 'count': len(targets), + 'nodes': targets, + 'node_type': node_type, + 'source_provider': provider_name, + 'discovery_depth': current_depth, + 'threshold_exceeded': self.config.large_entity_threshold, + } + + attributes_list = [] + for key, value in attributes_dict.items(): + attributes_list.append({ + "name": key, + "value": value, + "type": "large_entity_info", + "provider": provider_name, + "confidence": 0.9, + "metadata": {} + }) + + description = f'Large entity created due to {len(targets)} relationships from {provider_name}' + + self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes_list, description=description) + + if provider_result.relationships: + rel_type = provider_result.relationships[0].relationship_type + self.graph.add_edge(source, entity_id, rel_type, 0.9, provider_name, + {'large_entity_info': f'Contains {len(targets)} {node_type}s'}) + + self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}") + + return set(targets) + def stop_scan(self) -> bool: """Request immediate scan termination with proper cleanup.""" try: - print("=== INITIATING IMMEDIATE SCAN TERMINATION ===") self.logger.logger.info("Scan termination requested by user") - - # **IMPROVED**: More aggressive stop signal setting self._set_stop_signal() self.status = ScanStatus.STOPPED - # **NEW**: Clear processing state immediately with self.processing_lock: - currently_processing_copy = self.currently_processing.copy() self.currently_processing.clear() - print(f"Cleared {len(currently_processing_copy)} currently processing targets: {currently_processing_copy}") + self.currently_processing_display = [] - # **IMPROVED**: Clear task queue and log what was discarded - discarded_tasks = [] - while not self.task_queue.empty(): - discarded_tasks.append(self.task_queue.get()) self.task_queue = PriorityQueue() - print(f"Discarded {len(discarded_tasks)} pending tasks") - # **IMPROVED**: Aggressively shut down executor if self.executor: - print("Shutting down executor with immediate cancellation...") try: - # Cancel all pending futures self.executor.shutdown(wait=False, cancel_futures=True) - print("Executor shutdown completed") - except Exception as e: - print(f"Error during executor shutdown: {e}") + except Exception: + pass - # Immediately update GUI with stopped status self._update_session_state() - - print("Termination signals sent. The scan will stop as soon as possible.") return True except Exception as e: - print(f"ERROR: Exception in stop_scan: {e}") self.logger.logger.error(f"Error during scan termination: {e}") traceback.print_exc() return False + def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool: + """ + Extracts a node from a large entity and re-queues it for scanning. + """ + if not self.graph.graph.has_node(large_entity_id): + return False + + predecessors = list(self.graph.graph.predecessors(large_entity_id)) + if not predecessors: + return False + source_node_id = predecessors[0] + + original_edge_data = self.graph.graph.get_edge_data(source_node_id, large_entity_id) + if not original_edge_data: + return False + + success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract) + if not success: + return False + + self.graph.add_edge( + source_id=source_node_id, + target_id=node_id_to_extract, + relationship_type=original_edge_data.get('relationship_type', 'extracted_from_large_entity'), + confidence_score=original_edge_data.get('confidence_score', 0.85), + source_provider=original_edge_data.get('source_provider', 'unknown'), + raw_data={'context': f'Extracted from large entity {large_entity_id}'} + ) + + is_ip = _is_valid_ip(node_id_to_extract) + + large_entity_attributes = self.graph.graph.nodes[large_entity_id].get('attributes', []) + discovery_depth_attr = next((attr for attr in large_entity_attributes if attr.get('name') == 'discovery_depth'), None) + current_depth = discovery_depth_attr['value'] if discovery_depth_attr else 0 + + eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False) + for provider in eligible_providers: + provider_name = provider.get_name() + priority = self._get_priority(provider_name) + self.task_queue.put((time.time(), priority, (provider_name, node_id_to_extract, current_depth))) + self.total_tasks_ever_enqueued += 1 + + if self.status != ScanStatus.RUNNING: + self.status = ScanStatus.RUNNING + self._update_session_state() + + if not self.scan_thread or not self.scan_thread.is_alive(): + self.scan_thread = threading.Thread( + target=self._execute_scan, + args=(self.current_target, self.max_depth), + daemon=True + ) + self.scan_thread.start() + + return True + def _update_session_state(self) -> None: """ Update the scanner state in Redis for GUI updates. - This ensures the web interface sees real-time updates. """ if self.session_id: try: from core.session_manager import session_manager - success = session_manager.update_session_scanner(self.session_id, self) - if not success: - print(f"WARNING: Failed to update session state for {self.session_id}") - except Exception as e: - print(f"ERROR: Failed to update session state: {e}") + session_manager.update_session_scanner(self.session_id, self) + except Exception: + pass def get_scan_status(self) -> Dict[str, Any]: - """Get current scan status with processing information.""" + """Get current scan status with comprehensive processing information.""" try: with self.processing_lock: currently_processing_count = len(self.currently_processing) @@ -594,113 +797,50 @@ class Scanner: 'graph_statistics': self.graph.get_statistics(), 'task_queue_size': self.task_queue.qsize(), 'currently_processing_count': currently_processing_count, - 'currently_processing': currently_processing_list[:5] + 'currently_processing': currently_processing_list[:5], + 'tasks_in_queue': self.task_queue.qsize(), + 'tasks_completed': self.indicators_completed, + 'tasks_skipped': self.tasks_skipped, + 'tasks_rescheduled': self.tasks_re_enqueued, } - except Exception as e: - print(f"ERROR: Exception in get_scan_status: {e}") + except Exception: traceback.print_exc() - return { - 'status': 'error', - 'target_domain': None, - 'current_depth': 0, - 'max_depth': 0, - 'current_indicator': '', - 'indicators_processed': 0, - 'indicators_completed': 0, - 'tasks_re_enqueued': 0, - 'progress_percentage': 0.0, - 'enabled_providers': [], - 'graph_statistics': {}, - 'task_queue_size': 0, - 'currently_processing_count': 0, - 'currently_processing': [] - } + return { 'status': 'error', 'message': 'Failed to get status' } def _initialize_provider_states(self, target: str) -> None: """Initialize provider states for forensic tracking.""" - if not self.graph.graph.has_node(target): - return - + if not self.graph.graph.has_node(target): return node_data = self.graph.graph.nodes[target] - if 'metadata' not in node_data: - node_data['metadata'] = {} - if 'provider_states' not in node_data['metadata']: - node_data['metadata']['provider_states'] = {} + if 'metadata' not in node_data: node_data['metadata'] = {} + if 'provider_states' not in node_data['metadata']: node_data['metadata']['provider_states'] = {} def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool) -> List: """Get providers eligible for querying this target.""" if dns_only: return [p for p in self.providers if p.get_name() == 'dns'] - eligible = [] target_key = 'ips' if is_ip else 'domains' - for provider in self.providers: if provider.get_eligibility().get(target_key): if not self._already_queried_provider(target, provider.get_name()): eligible.append(provider) - else: - print(f"Skipping {provider.get_name()} for {target} - already queried") - return eligible def _already_queried_provider(self, target: str, provider_name: str) -> bool: """Check if we already successfully queried a provider for a target.""" - if not self.graph.graph.has_node(target): - return False - + if not self.graph.graph.has_node(target): return False node_data = self.graph.graph.nodes[target] provider_states = node_data.get('metadata', {}).get('provider_states', {}) - - # A provider has been successfully queried if a state exists and its status is 'success' provider_state = provider_states.get(provider_name) return provider_state is not None and provider_state.get('status') == 'success' - def _query_single_provider_forensic(self, provider, target: str, is_ip: bool, current_depth: int) -> Optional[List]: - """Query a single provider with stop signal checking.""" - provider_name = provider.get_name() - start_time = datetime.now(timezone.utc) - - if self._is_stop_requested(): - print(f"Stop requested before querying {provider_name} for {target}") - return None - - print(f"Querying {provider_name} for {target}") - - self.logger.logger.info(f"Attempting {provider_name} query for {target} at depth {current_depth}") - - try: - if is_ip: - results = provider.query_ip(target) - else: - results = provider.query_domain(target) - - if self._is_stop_requested(): - print(f"Stop requested after querying {provider_name} for {target}") - return None - - self._update_provider_state(target, provider_name, 'success', len(results), None, start_time) - - print(f"✓ {provider_name} returned {len(results)} results for {target}") - return results - - except Exception as e: - self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time) - print(f"✗ {provider_name} failed for {target}: {e}") - return None - def _update_provider_state(self, target: str, provider_name: str, status: str, results_count: int, error: Optional[str], start_time: datetime) -> None: """Update provider state in node metadata for forensic tracking.""" - if not self.graph.graph.has_node(target): - return - + if not self.graph.graph.has_node(target): return node_data = self.graph.graph.nodes[target] - if 'metadata' not in node_data: - node_data['metadata'] = {} - if 'provider_states' not in node_data['metadata']: - node_data['metadata']['provider_states'] = {} - + if 'metadata' not in node_data: node_data['metadata'] = {} + if 'provider_states' not in node_data['metadata']: node_data['metadata']['provider_states'] = {} node_data['metadata']['provider_states'][provider_name] = { 'status': status, 'timestamp': start_time.isoformat(), @@ -709,295 +849,41 @@ class Scanner: 'duration_ms': (datetime.now(timezone.utc) - start_time).total_seconds() * 1000 } - self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)") - - def _process_provider_results(self, target: str, provider, results: List, - node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]: - """Process provider results, returns (discovered_targets, is_large_entity).""" - provider_name = provider.get_name() - discovered_targets = set() - - if self._is_stop_requested(): - print(f"Stop requested before processing results from {provider_name} for {target}") - return discovered_targets, False - - if len(results) > self.config.large_entity_threshold: - print(f"Large entity detected: {provider_name} returned {len(results)} results for {target}") - members = self._create_large_entity(target, provider_name, results, current_depth) - return members, True - - for i, (source, rel_target, rel_type, confidence, raw_data) in enumerate(results): - if i % 5 == 0 and self._is_stop_requested(): # Check more frequently - print(f"Stop requested while processing results from {provider_name} for {target}") - break - - self.logger.log_relationship_discovery( - source_node=source, - target_node=rel_target, - relationship_type=rel_type, - confidence_score=confidence, - provider=provider_name, - raw_data=raw_data, - discovery_method=f"{provider_name}_query_depth_{current_depth}" - ) - - # Collect attributes for the source node - self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source]) - - # If the relationship is asn_membership, collect attributes for the target ASN node - if rel_type == 'asn_membership': - self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target]) - - - if isinstance(rel_target, list): - # If the target is a list, iterate and process each item - for single_target in rel_target: - if _is_valid_ip(single_target): - self.graph.add_node(single_target, NodeType.IP) - if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data): - print(f"Added IP relationship: {source} -> {single_target} ({rel_type})") - discovered_targets.add(single_target) - elif _is_valid_domain(single_target): - self.graph.add_node(single_target, NodeType.DOMAIN) - if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data): - print(f"Added domain relationship: {source} -> {single_target} ({rel_type})") - discovered_targets.add(single_target) - self._collect_node_attributes(single_target, provider_name, rel_type, source, raw_data, node_attributes[single_target]) - - elif _is_valid_ip(rel_target): - self.graph.add_node(rel_target, NodeType.IP) - if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data): - print(f"Added IP relationship: {source} -> {rel_target} ({rel_type})") - discovered_targets.add(rel_target) - - elif rel_target.startswith('AS') and rel_target[2:].isdigit(): - self.graph.add_node(rel_target, NodeType.ASN) - if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data): - print(f"Added ASN relationship: {source} -> {rel_target} ({rel_type})") - - elif _is_valid_domain(rel_target): - self.graph.add_node(rel_target, NodeType.DOMAIN) - if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data): - print(f"Added domain relationship: {source} -> {rel_target} ({rel_type})") - discovered_targets.add(rel_target) - self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target]) - - else: - self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source]) - - return discovered_targets, False - - def _create_large_entity(self, source: str, provider_name: str, results: List, current_depth: int) -> Set[str]: - """Create a large entity node and returns the members for DNS processing.""" - entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}" - - targets = [rel[1] for rel in results if len(rel) > 1] - node_type = 'unknown' - - if targets: - if _is_valid_domain(targets[0]): - node_type = 'domain' - elif _is_valid_ip(targets[0]): - node_type = 'ip' - - # We still create the nodes so they exist in the graph, they are just not processed for edges yet. - for target in targets: - self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP) - - attributes = { - 'count': len(targets), - 'nodes': targets, - 'node_type': node_type, - 'source_provider': provider_name, - 'discovery_depth': current_depth, - 'threshold_exceeded': self.config.large_entity_threshold, - } - description = f'Large entity created due to {len(targets)} results from {provider_name}' - - self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes, description=description) - - if results: - rel_type = results[0][2] - self.graph.add_edge(source, entity_id, rel_type, 0.9, provider_name, - {'large_entity_info': f'Contains {len(targets)} {node_type}s'}) - - self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}") - print(f"Created large entity {entity_id} for {len(targets)} {node_type}s from {provider_name}") - - return set(targets) - - def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool: - """ - Extracts a node from a large entity, re-creates its original edge, and - re-queues it for full scanning. - """ - if not self.graph.graph.has_node(large_entity_id): - print(f"ERROR: Large entity {large_entity_id} not found.") - return False - - # 1. Get the original source node that discovered the large entity - predecessors = list(self.graph.graph.predecessors(large_entity_id)) - if not predecessors: - print(f"ERROR: No source node found for large entity {large_entity_id}.") - return False - source_node_id = predecessors[0] - - # Get the original edge data to replicate it for the extracted node - original_edge_data = self.graph.graph.get_edge_data(source_node_id, large_entity_id) - if not original_edge_data: - print(f"ERROR: Could not find original edge data from {source_node_id} to {large_entity_id}.") - return False - - # 2. Modify the graph data structure first - success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract) - if not success: - print(f"ERROR: Node {node_id_to_extract} could not be removed from {large_entity_id}'s attributes.") - return False - - # 3. Create the direct edge from the original source to the newly extracted node - print(f"Re-creating direct edge from {source_node_id} to extracted node {node_id_to_extract}") - self.graph.add_edge( - source_id=source_node_id, - target_id=node_id_to_extract, - relationship_type=original_edge_data.get('relationship_type', 'extracted_from_large_entity'), - confidence_score=original_edge_data.get('confidence_score', 0.85), # Slightly lower confidence - source_provider=original_edge_data.get('source_provider', 'unknown'), - raw_data={'context': f'Extracted from large entity {large_entity_id}'} - ) - - # 4. Re-queue the extracted node for full processing by all eligible providers - print(f"Re-queueing extracted node {node_id_to_extract} for full reconnaissance...") - is_ip = _is_valid_ip(node_id_to_extract) - current_depth = self.graph.graph.nodes[large_entity_id].get('attributes', {}).get('discovery_depth', 0) - - eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False) - for provider in eligible_providers: - provider_name = provider.get_name() - self.task_queue.put((self._get_priority(provider_name), (provider_name, node_id_to_extract, current_depth))) - self.total_tasks_ever_enqueued += 1 - - # 5. If the scanner is not running, we need to kickstart it to process this one item. - if self.status != ScanStatus.RUNNING: - print("Scanner is idle. Starting a mini-scan to process the extracted node.") - self.status = ScanStatus.RUNNING - self._update_session_state() - - if not self.scan_thread or not self.scan_thread.is_alive(): - self.scan_thread = threading.Thread( - target=self._execute_scan, - args=(self.current_target, self.max_depth), - daemon=True - ) - self.scan_thread.start() - - print(f"Successfully extracted and re-queued {node_id_to_extract} from {large_entity_id}.") - return True - - def _collect_node_attributes(self, node_id: str, provider_name: str, rel_type: str, - target: str, raw_data: Dict[str, Any], attributes: Dict[str, Any]) -> None: - """Collect and organize attributes for a node.""" - self.logger.logger.debug(f"Collecting attributes for {node_id} from {provider_name}: {rel_type}") - - if provider_name == 'dns': - record_type = raw_data.get('query_type', 'UNKNOWN') - value = raw_data.get('value', target) - dns_entry = f"{record_type}: {value}" - if dns_entry not in attributes.get('dns_records', []): - attributes.setdefault('dns_records', []).append(dns_entry) - - elif provider_name == 'crtsh': - if rel_type == "san_certificate": - domain_certs = raw_data.get('domain_certificates', {}) - if node_id in domain_certs: - cert_summary = domain_certs[node_id] - attributes['certificates'] = cert_summary - if target not in attributes.get('related_domains_san', []): - attributes.setdefault('related_domains_san', []).append(target) - - elif provider_name == 'shodan': - # This logic will now apply to the correct node (ASN or IP) - shodan_attributes = attributes.setdefault('shodan', {}) - for key, value in raw_data.items(): - if key not in shodan_attributes or not shodan_attributes.get(key): - shodan_attributes[key] = value - - if _is_valid_ip(node_id): - if 'ports' in raw_data: - attributes['ports'] = raw_data['ports'] - if 'os' in raw_data and raw_data['os']: - attributes['os'] = raw_data['os'] - - if rel_type == "asn_membership": - # This is the key change: these attributes are for the target (the ASN), - # not the source (the IP). We will add them to the ASN node later. - pass - - record_type_name = rel_type - if record_type_name not in attributes: - attributes[record_type_name] = [] - - if isinstance(target, list): - attributes[record_type_name].extend(target) - else: - if target not in attributes[record_type_name]: - attributes[record_type_name].append(target) - def _log_target_processing_error(self, target: str, error: str) -> None: - """Log target processing errors for forensic trail.""" self.logger.logger.error(f"Target processing failed for {target}: {error}") def _log_provider_error(self, target: str, provider_name: str, error: str) -> None: - """Log provider query errors for forensic trail.""" self.logger.logger.error(f"Provider {provider_name} failed for {target}: {error}") - def _log_no_eligible_providers(self, target: str, is_ip: bool) -> None: - """Log when no providers are eligible for a target.""" - target_type = 'IP' if is_ip else 'domain' - self.logger.logger.warning(f"No eligible providers for {target_type}: {target}") - def _calculate_progress(self) -> float: - """Calculate scan progress percentage based on task completion.""" - if self.total_tasks_ever_enqueued == 0: - return 0.0 + if self.total_tasks_ever_enqueued == 0: return 0.0 return min(100.0, (self.indicators_completed / self.total_tasks_ever_enqueued) * 100) def get_graph_data(self) -> Dict[str, Any]: - """Get current graph data for visualization.""" - return self.graph.get_graph_data() + graph_data = self.graph.get_graph_data() + graph_data['initial_targets'] = list(self.initial_targets) + return graph_data def export_results(self) -> Dict[str, Any]: - """Export complete scan results with forensic audit trail.""" graph_data = self.graph.export_json() audit_trail = self.logger.export_audit_trail() provider_stats = {} for provider in self.providers: provider_stats[provider.get_name()] = provider.get_statistics() - export_data = { + return { 'scan_metadata': { - 'target_domain': self.current_target, - 'max_depth': self.max_depth, - 'final_status': self.status, - 'total_indicators_processed': self.indicators_processed, - 'enabled_providers': list(provider_stats.keys()), - 'session_id': self.session_id + 'target_domain': self.current_target, 'max_depth': self.max_depth, + 'final_status': self.status, 'total_indicators_processed': self.indicators_processed, + 'enabled_providers': list(provider_stats.keys()), 'session_id': self.session_id }, 'graph_data': graph_data, 'forensic_audit': audit_trail, 'provider_statistics': provider_stats, 'scan_summary': self.logger.get_forensic_summary() } - return export_data - - def get_provider_statistics(self) -> Dict[str, Dict[str, Any]]: - """Get statistics for all providers with forensic information.""" - stats = {} - for provider in self.providers: - stats[provider.get_name()] = provider.get_statistics() - return stats def get_provider_info(self) -> Dict[str, Dict[str, Any]]: - """Get information about all available providers.""" info = {} provider_dir = os.path.join(os.path.dirname(__file__), '..', 'providers') for filename in os.listdir(provider_dir): @@ -1009,13 +895,9 @@ class Scanner: attribute = getattr(module, attribute_name) if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider: provider_class = attribute - # Instantiate to get metadata, even if not fully configured temp_provider = provider_class(name=attribute_name, session_config=self.config) provider_name = temp_provider.get_name() - - # Find the actual provider instance if it exists, to get live stats live_provider = next((p for p in self.providers if p.get_name() == provider_name), None) - info[provider_name] = { 'display_name': temp_provider.get_display_name(), 'requires_api_key': temp_provider.requires_api_key(), @@ -1023,7 +905,6 @@ class Scanner: 'enabled': self.config.is_provider_enabled(provider_name), 'rate_limit': self.config.get_rate_limit(provider_name), } - except Exception as e: - print(f"✗ Failed to get info for provider from {filename}: {e}") + except Exception: traceback.print_exc() return info \ No newline at end of file diff --git a/core/session_manager.py b/core/session_manager.py index 7631db9..a1d916c 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -5,18 +5,15 @@ import time import uuid import redis import pickle -from typing import Dict, Optional, Any, List +from typing import Dict, Optional, Any from core.scanner import Scanner from config import config -# WARNING: Using pickle can be a security risk if the data source is not trusted. -# In this case, we are only serializing/deserializing our own trusted Scanner objects, -# which is generally safe. Do not unpickle data from untrusted sources. - class SessionManager: """ - Manages multiple scanner instances for concurrent user sessions using Redis. + FIXED: Manages multiple scanner instances for concurrent user sessions using Redis. + Now more conservative about session creation to preserve API keys and configuration. """ def __init__(self, session_timeout_minutes: int = 0): @@ -28,7 +25,10 @@ class SessionManager: self.redis_client = redis.StrictRedis(db=0, decode_responses=False) self.session_timeout = session_timeout_minutes * 60 # Convert to seconds - self.lock = threading.Lock() # Lock for local operations, Redis handles atomic ops + self.lock = threading.Lock() + + # FIXED: Add a creation lock to prevent race conditions + self.creation_lock = threading.Lock() # Start cleanup thread self.cleanup_thread = threading.Thread(target=self._cleanup_loop, daemon=True) @@ -40,7 +40,7 @@ class SessionManager: """Prepare SessionManager for pickling.""" state = self.__dict__.copy() # Exclude unpickleable attributes - Redis client and threading objects - unpicklable_attrs = ['lock', 'cleanup_thread', 'redis_client'] + unpicklable_attrs = ['lock', 'cleanup_thread', 'redis_client', 'creation_lock'] for attr in unpicklable_attrs: if attr in state: del state[attr] @@ -50,9 +50,9 @@ class SessionManager: """Restore SessionManager after unpickling.""" self.__dict__.update(state) # Re-initialize unpickleable attributes - import redis self.redis_client = redis.StrictRedis(db=0, decode_responses=False) self.lock = threading.Lock() + self.creation_lock = threading.Lock() self.cleanup_thread = threading.Thread(target=self._cleanup_loop, daemon=True) self.cleanup_thread.start() @@ -66,44 +66,47 @@ class SessionManager: def create_session(self) -> str: """ - Create a new user session and store it in Redis. + FIXED: Create a new user session with thread-safe creation to prevent duplicates. """ - session_id = str(uuid.uuid4()) - print(f"=== CREATING SESSION {session_id} IN REDIS ===") - - try: - from core.session_config import create_session_config - session_config = create_session_config() - scanner_instance = Scanner(session_config=session_config) + # FIXED: Use creation lock to prevent race conditions + with self.creation_lock: + session_id = str(uuid.uuid4()) + print(f"=== CREATING SESSION {session_id} IN REDIS ===") - # Set the session ID on the scanner for cross-process stop signal management - scanner_instance.session_id = session_id - - session_data = { - 'scanner': scanner_instance, - 'config': session_config, - 'created_at': time.time(), - 'last_activity': time.time(), - 'status': 'active' - } - - # Serialize the entire session data dictionary using pickle - serialized_data = pickle.dumps(session_data) - - # Store in Redis - session_key = self._get_session_key(session_id) - self.redis_client.setex(session_key, self.session_timeout, serialized_data) - - # Initialize stop signal as False - stop_key = self._get_stop_signal_key(session_id) - self.redis_client.setex(stop_key, self.session_timeout, b'0') - - print(f"Session {session_id} stored in Redis with stop signal initialized") - return session_id - - except Exception as e: - print(f"ERROR: Failed to create session {session_id}: {e}") - raise + try: + from core.session_config import create_session_config + session_config = create_session_config() + scanner_instance = Scanner(session_config=session_config) + + # Set the session ID on the scanner for cross-process stop signal management + scanner_instance.session_id = session_id + + session_data = { + 'scanner': scanner_instance, + 'config': session_config, + 'created_at': time.time(), + 'last_activity': time.time(), + 'status': 'active' + } + + # Serialize the entire session data dictionary using pickle + serialized_data = pickle.dumps(session_data) + + # Store in Redis + session_key = self._get_session_key(session_id) + self.redis_client.setex(session_key, self.session_timeout, serialized_data) + + # Initialize stop signal as False + stop_key = self._get_stop_signal_key(session_id) + self.redis_client.setex(stop_key, self.session_timeout, b'0') + + print(f"Session {session_id} stored in Redis with stop signal initialized") + print(f"Session has {len(scanner_instance.providers)} providers: {[p.get_name() for p in scanner_instance.providers]}") + return session_id + + except Exception as e: + print(f"ERROR: Failed to create session {session_id}: {e}") + raise def set_stop_signal(self, session_id: str) -> bool: """ @@ -212,7 +215,14 @@ class SessionManager: # Immediately save to Redis for GUI updates success = self._save_session_data(session_id, session_data) if success: - print(f"Scanner state updated for session {session_id} (status: {scanner.status})") + # Only log occasionally to reduce noise + if hasattr(self, '_last_update_log'): + if time.time() - self._last_update_log > 5: # Log every 5 seconds max + #print(f"Scanner state updated for session {session_id} (status: {scanner.status})") + self._last_update_log = time.time() + else: + #print(f"Scanner state updated for session {session_id} (status: {scanner.status})") + self._last_update_log = time.time() else: print(f"WARNING: Failed to save scanner state for session {session_id}") return success diff --git a/providers/base_provider.py b/providers/base_provider.py index 7941fb6..d326def 100644 --- a/providers/base_provider.py +++ b/providers/base_provider.py @@ -4,16 +4,17 @@ import time import requests import threading from abc import ABC, abstractmethod -from typing import List, Dict, Any, Optional, Tuple +from typing import Dict, Any, Optional from core.logger import get_forensic_logger from core.rate_limiter import GlobalRateLimiter +from core.provider_result import ProviderResult class BaseProvider(ABC): """ Abstract base class for all DNSRecon data providers. - Now supports session-specific configuration. + Now supports session-specific configuration and returns standardized ProviderResult objects. """ def __init__(self, name: str, rate_limit: int = 60, timeout: int = 30, session_config=None): @@ -101,7 +102,7 @@ class BaseProvider(ABC): pass @abstractmethod - def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + def query_domain(self, domain: str) -> ProviderResult: """ Query the provider for information about a domain. @@ -109,12 +110,12 @@ class BaseProvider(ABC): domain: Domain to investigate Returns: - List of tuples: (source_node, target_node, relationship_type, confidence, raw_data) + ProviderResult containing standardized attributes and relationships """ pass @abstractmethod - def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + def query_ip(self, ip: str) -> ProviderResult: """ Query the provider for information about an IP address. @@ -122,7 +123,7 @@ class BaseProvider(ABC): ip: IP address to investigate Returns: - List of tuples: (source_node, target_node, relationship_type, confidence, raw_data) + ProviderResult containing standardized attributes and relationships """ pass @@ -132,6 +133,8 @@ class BaseProvider(ABC): target_indicator: str = "") -> Optional[requests.Response]: """ Make a rate-limited HTTP request. + FIXED: Returns response without automatically raising HTTPError exceptions. + Individual providers should handle status codes appropriately. """ if self._is_stop_requested(): print(f"Request cancelled before start: {url}") @@ -168,8 +171,14 @@ class BaseProvider(ABC): raise ValueError(f"Unsupported HTTP method: {method}") print(f"Response status: {response.status_code}") - response.raise_for_status() - self.successful_requests += 1 + + # FIXED: Don't automatically raise for HTTP error status codes + # Let individual providers handle status codes appropriately + # Only count 2xx responses as successful + if 200 <= response.status_code < 300: + self.successful_requests += 1 + else: + self.failed_requests += 1 duration_ms = (time.time() - start_time) * 1000 self.logger.log_api_request( diff --git a/providers/crtsh_provider.py b/providers/crtsh_provider.py index bfa2c51..731cfd2 100644 --- a/providers/crtsh_provider.py +++ b/providers/crtsh_provider.py @@ -2,21 +2,22 @@ import json import re -import os from pathlib import Path -from typing import List, Dict, Any, Tuple, Set +from typing import List, Dict, Any, Set from urllib.parse import quote from datetime import datetime, timezone import requests from .base_provider import BaseProvider +from core.provider_result import ProviderResult from utils.helpers import _is_valid_domain class CrtShProvider(BaseProvider): """ Provider for querying crt.sh certificate transparency database. - Now uses session-specific configuration and caching with accumulative behavior. + FIXED: Now properly creates domain and CA nodes instead of large entities. + Returns standardized ProviderResult objects with caching support. """ def __init__(self, name=None, session_config=None): @@ -30,9 +31,12 @@ class CrtShProvider(BaseProvider): self.base_url = "https://crt.sh/" self._stop_event = None - # Initialize cache directory - self.cache_dir = Path('cache') / 'crtsh' - self.cache_dir.mkdir(parents=True, exist_ok=True) + # Initialize cache directory (separate from BaseProvider's HTTP cache) + self.domain_cache_dir = Path('cache') / 'crtsh' + self.domain_cache_dir.mkdir(parents=True, exist_ok=True) + + # Compile regex for date filtering for efficiency + self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}') def get_name(self) -> str: """Return the provider name.""" @@ -51,18 +55,13 @@ class CrtShProvider(BaseProvider): return {'domains': True, 'ips': False} def is_available(self) -> bool: - """ - Check if the provider is configured to be used. - This method is intentionally simple and does not perform a network request - to avoid blocking application startup. - """ + """Check if the provider is configured to be used.""" return True def _get_cache_file_path(self, domain: str) -> Path: """Generate cache file path for a domain.""" - # Sanitize domain for filename safety safe_domain = domain.replace('.', '_').replace('/', '_').replace('\\', '_') - return self.cache_dir / f"{safe_domain}.json" + return self.domain_cache_dir / f"{safe_domain}.json" def _get_cache_status(self, cache_file_path: Path) -> str: """ @@ -78,7 +77,7 @@ class CrtShProvider(BaseProvider): last_query_str = cache_data.get("last_upstream_query") if not last_query_str: - return "stale" # Invalid cache format + return "stale" last_query = datetime.fromisoformat(last_query_str.replace('Z', '+00:00')) hours_since_query = (datetime.now(timezone.utc) - last_query).total_seconds() / 3600 @@ -92,160 +91,323 @@ class CrtShProvider(BaseProvider): except (json.JSONDecodeError, ValueError, KeyError) as e: self.logger.logger.warning(f"Invalid cache file format for {cache_file_path}: {e}") return "stale" - - def _load_cached_certificates(self, cache_file_path: Path) -> List[Dict[str, Any]]: - """Load certificates from cache file.""" + + def query_domain(self, domain: str) -> ProviderResult: + """ + FIXED: Query crt.sh for certificates containing the domain. + Now properly creates domain and CA nodes instead of large entities. + + Args: + domain: Domain to investigate + + Returns: + ProviderResult containing discovered relationships and attributes + """ + if not _is_valid_domain(domain): + return ProviderResult() + + if self._stop_event and self._stop_event.is_set(): + return ProviderResult() + + cache_file = self._get_cache_file_path(domain) + cache_status = self._get_cache_status(cache_file) + + result = ProviderResult() + + try: + if cache_status == "fresh": + result = self._load_from_cache(cache_file) + self.logger.logger.info(f"Using fresh cached crt.sh data for {domain}") + + else: # "stale" or "not_found" + # Query the API for the latest certificates + new_raw_certs = self._query_crtsh_api(domain) + + if self._stop_event and self._stop_event.is_set(): + return ProviderResult() + + # Combine with old data if cache is stale + if cache_status == "stale": + old_raw_certs = self._load_raw_data_from_cache(cache_file) + combined_certs = old_raw_certs + new_raw_certs + + # Deduplicate the combined list + seen_ids = set() + unique_certs = [] + for cert in combined_certs: + cert_id = cert.get('id') + if cert_id not in seen_ids: + unique_certs.append(cert) + seen_ids.add(cert_id) + + raw_certificates_to_process = unique_certs + self.logger.logger.info(f"Refreshed and merged cache for {domain}. Total unique certs: {len(raw_certificates_to_process)}") + else: # "not_found" + raw_certificates_to_process = new_raw_certs + + # FIXED: Process certificates to create proper domain and CA nodes + result = self._process_certificates_to_result_fixed(domain, raw_certificates_to_process) + self.logger.logger.info(f"Created fresh result for {domain} ({result.get_relationship_count()} relationships)") + + # Save the new result and the raw data to the cache + self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain) + + except requests.exceptions.RequestException as e: + self.logger.logger.error(f"API query failed for {domain}: {e}") + if cache_status != "not_found": + result = self._load_from_cache(cache_file) + self.logger.logger.warning(f"Using stale cache for {domain} due to API failure.") + else: + raise e # Re-raise if there's no cache to fall back on + + return result + + def query_ip(self, ip: str) -> ProviderResult: + """ + Query crt.sh for certificates containing the IP address. + Note: crt.sh doesn't typically index by IP, so this returns empty results. + + Args: + ip: IP address to investigate + + Returns: + Empty ProviderResult (crt.sh doesn't support IP-based certificate queries effectively) + """ + return ProviderResult() + + def _load_from_cache(self, cache_file_path: Path) -> ProviderResult: + """Load processed crt.sh data from a cache file.""" try: with open(cache_file_path, 'r') as f: - cache_data = json.load(f) - return cache_data.get('certificates', []) + cache_content = json.load(f) + + result = ProviderResult() + + # Reconstruct relationships + for rel_data in cache_content.get("relationships", []): + result.add_relationship( + source_node=rel_data["source_node"], + target_node=rel_data["target_node"], + relationship_type=rel_data["relationship_type"], + provider=rel_data["provider"], + confidence=rel_data["confidence"], + raw_data=rel_data.get("raw_data", {}) + ) + + # Reconstruct attributes + for attr_data in cache_content.get("attributes", []): + result.add_attribute( + target_node=attr_data["target_node"], + name=attr_data["name"], + value=attr_data["value"], + attr_type=attr_data["type"], + provider=attr_data["provider"], + confidence=attr_data["confidence"], + metadata=attr_data.get("metadata", {}) + ) + + return result + except (json.JSONDecodeError, FileNotFoundError, KeyError) as e: self.logger.logger.error(f"Failed to load cached certificates from {cache_file_path}: {e}") + return ProviderResult() + + def _load_raw_data_from_cache(self, cache_file_path: Path) -> List[Dict[str, Any]]: + """Load only the raw certificate data from a cache file.""" + try: + with open(cache_file_path, 'r') as f: + cache_content = json.load(f) + return cache_content.get("raw_certificates", []) + except (json.JSONDecodeError, FileNotFoundError): return [] - + + def _save_result_to_cache(self, cache_file_path: Path, result: ProviderResult, raw_certificates: List[Dict[str, Any]], domain: str) -> None: + """Save processed crt.sh result and raw data to a cache file.""" + try: + cache_data = { + "domain": domain, + "last_upstream_query": datetime.now(timezone.utc).isoformat(), + "raw_certificates": raw_certificates, # Store the raw data for deduplication + "relationships": [ + { + "source_node": rel.source_node, + "target_node": rel.target_node, + "relationship_type": rel.relationship_type, + "confidence": rel.confidence, + "provider": rel.provider, + "raw_data": rel.raw_data + } for rel in result.relationships + ], + "attributes": [ + { + "target_node": attr.target_node, + "name": attr.name, + "value": attr.value, + "type": attr.type, + "provider": attr.provider, + "confidence": attr.confidence, + "metadata": attr.metadata + } for attr in result.attributes + ] + } + cache_file_path.parent.mkdir(parents=True, exist_ok=True) + with open(cache_file_path, 'w') as f: + json.dump(cache_data, f, separators=(',', ':'), default=str) + except Exception as e: + self.logger.logger.warning(f"Failed to save cache file for {domain}: {e}") + def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]: - """ - Query crt.sh API for raw certificate data. - Raises exceptions for network errors to allow core logic to retry. - """ + """Query crt.sh API for raw certificate data.""" url = f"{self.base_url}?q={quote(domain)}&output=json" response = self.make_request(url, target_indicator=domain) if not response or response.status_code != 200: - # This could be a temporary error - raise exception so core can retry raise requests.exceptions.RequestException(f"crt.sh API returned status {response.status_code if response else 'None'}") - certificates = response.json() + try: + certificates = response.json() + except json.JSONDecodeError: + self.logger.logger.error(f"crt.sh returned invalid JSON for {domain}") + return [] + if not certificates: return [] return certificates - - def _parse_issuer_organization(self, issuer_dn: str) -> str: + + def _process_certificates_to_result_fixed(self, query_domain: str, certificates: List[Dict[str, Any]]) -> ProviderResult: """ - Parse the issuer Distinguished Name to extract just the organization name. + FIXED: Process certificates to create proper domain and CA nodes. + Now creates individual domain nodes instead of large entities. + """ + result = ProviderResult() + + if self._stop_event and self._stop_event.is_set(): + self.logger.logger.info(f"CrtSh processing cancelled before processing for domain: {query_domain}") + return result + + all_discovered_domains = set() + processed_issuers = set() + + for i, cert_data in enumerate(certificates): + if i % 10 == 0 and self._stop_event and self._stop_event.is_set(): + self.logger.logger.info(f"CrtSh processing cancelled at certificate {i} for domain: {query_domain}") + break + + # Extract all domains from this certificate + cert_domains = self._extract_domains_from_certificate(cert_data) + all_discovered_domains.update(cert_domains) + + # FIXED: Create CA nodes for certificate issuers (not as domain metadata) + issuer_name = self._parse_issuer_organization(cert_data.get('issuer_name', '')) + if issuer_name and issuer_name not in processed_issuers: + # Create relationship from query domain to CA + result.add_relationship( + source_node=query_domain, + target_node=issuer_name, + relationship_type='crtsh_cert_issuer', + provider=self.name, + confidence=0.95, + raw_data={'issuer_dn': cert_data.get('issuer_name', '')} + ) + processed_issuers.add(issuer_name) + + # Add certificate metadata to each domain in this certificate + cert_metadata = self._extract_certificate_metadata(cert_data) + for cert_domain in cert_domains: + if not _is_valid_domain(cert_domain): + continue + + # Add certificate attributes to the domain + for key, value in cert_metadata.items(): + if value is not None: + result.add_attribute( + target_node=cert_domain, + name=f"cert_{key}", + value=value, + attr_type='certificate_data', + provider=self.name, + confidence=0.9, + metadata={'certificate_id': cert_data.get('id')} + ) + + if self._stop_event and self._stop_event.is_set(): + self.logger.logger.info(f"CrtSh query cancelled before relationship creation for domain: {query_domain}") + return result + + # FIXED: Create selective relationships to avoid large entities + # Only create relationships to domains that are closely related + for discovered_domain in all_discovered_domains: + if discovered_domain == query_domain: + continue + + if not _is_valid_domain(discovered_domain): + continue + + # FIXED: Only create relationships for domains that share a meaningful connection + # This prevents creating too many relationships that trigger large entity creation + if self._should_create_relationship(query_domain, discovered_domain): + confidence = self._calculate_domain_relationship_confidence( + query_domain, discovered_domain, [], all_discovered_domains + ) + + result.add_relationship( + source_node=query_domain, + target_node=discovered_domain, + relationship_type='crtsh_san_certificate', + provider=self.name, + confidence=confidence, + raw_data={'relationship_type': 'certificate_discovery'} + ) + + self.log_relationship_discovery( + source_node=query_domain, + target_node=discovered_domain, + relationship_type='crtsh_san_certificate', + confidence_score=confidence, + raw_data={'relationship_type': 'certificate_discovery'}, + discovery_method="certificate_transparency_analysis" + ) + + self.logger.logger.info(f"CrtSh processing completed for {query_domain}: {len(all_discovered_domains)} domains, {result.get_relationship_count()} relationships") + return result + + def _should_create_relationship(self, source_domain: str, target_domain: str) -> bool: + """ + FIXED: Determine if a relationship should be created between two domains. + This helps avoid creating too many relationships that trigger large entity creation. + """ + # Always create relationships for subdomains + if target_domain.endswith(f'.{source_domain}') or source_domain.endswith(f'.{target_domain}'): + return True - Args: - issuer_dn: Full issuer DN string (e.g., "C=US, O=Let's Encrypt, CN=R11") - - Returns: - Organization name (e.g., "Let's Encrypt") or original string if parsing fails - """ - if not issuer_dn: - return issuer_dn + # Create relationships for domains that share a common parent (up to 2 levels) + source_parts = source_domain.split('.') + target_parts = target_domain.split('.') - try: - # Split by comma and look for O= component - components = [comp.strip() for comp in issuer_dn.split(',')] - - for component in components: - if component.startswith('O='): - # Extract the value after O= - org_name = component[2:].strip() - # Remove quotes if present - if org_name.startswith('"') and org_name.endswith('"'): - org_name = org_name[1:-1] - return org_name - - # If no O= component found, return the original string - return issuer_dn - - except Exception as e: - self.logger.logger.debug(f"Failed to parse issuer DN '{issuer_dn}': {e}") - return issuer_dn - - def _parse_certificate_date(self, date_string: str) -> datetime: - """ - Parse certificate date from crt.sh format. - - Args: - date_string: Date string from crt.sh API - - Returns: - Parsed datetime object in UTC - """ - if not date_string: - raise ValueError("Empty date string") - - try: - # Handle various possible formats from crt.sh - if date_string.endswith('Z'): - return datetime.fromisoformat(date_string[:-1]).replace(tzinfo=timezone.utc) - elif '+' in date_string or date_string.endswith('UTC'): - # Handle timezone-aware strings - date_string = date_string.replace('UTC', '').strip() - if '+' in date_string: - date_string = date_string.split('+')[0] - return datetime.fromisoformat(date_string).replace(tzinfo=timezone.utc) - else: - # Assume UTC if no timezone specified - return datetime.fromisoformat(date_string).replace(tzinfo=timezone.utc) - except Exception as e: - # Fallback: try parsing without timezone info and assume UTC - try: - return datetime.strptime(date_string[:19], "%Y-%m-%dT%H:%M:%S").replace(tzinfo=timezone.utc) - except Exception: - raise ValueError(f"Unable to parse date: {date_string}") from e - - def _is_cert_valid(self, cert_data: Dict[str, Any]) -> bool: - """ - Check if a certificate is currently valid based on its expiry date. - - Args: - cert_data: Certificate data from crt.sh - - Returns: - True if certificate is currently valid (not expired) - """ - try: - not_after_str = cert_data.get('not_after') - if not not_after_str: - return False - - not_after_date = self._parse_certificate_date(not_after_str) - not_before_str = cert_data.get('not_before') - - now = datetime.now(timezone.utc) - - # Check if certificate is within valid date range - is_not_expired = not_after_date > now - - if not_before_str: - not_before_date = self._parse_certificate_date(not_before_str) - is_not_before_valid = not_before_date <= now - return is_not_expired and is_not_before_valid - - return is_not_expired - - except Exception as e: - self.logger.logger.debug(f"Certificate validity check failed: {e}") - return False + # Check if they share the same root domain (last 2 parts) + if len(source_parts) >= 2 and len(target_parts) >= 2: + source_root = '.'.join(source_parts[-2:]) + target_root = '.'.join(target_parts[-2:]) + return source_root == target_root + + return False def _extract_certificate_metadata(self, cert_data: Dict[str, Any]) -> Dict[str, Any]: - """ - Extract comprehensive metadata from certificate data. - - Args: - cert_data: Raw certificate data from crt.sh - - Returns: - Comprehensive certificate metadata dictionary - """ - # Parse the issuer name to get just the organization + """Extract comprehensive metadata from certificate data.""" raw_issuer_name = cert_data.get('issuer_name', '') parsed_issuer_name = self._parse_issuer_organization(raw_issuer_name) metadata = { 'certificate_id': cert_data.get('id'), 'serial_number': cert_data.get('serial_number'), - 'issuer_name': parsed_issuer_name, # Use parsed organization name - #'issuer_name_full': raw_issuer_name, # deliberately left out, because its not useful in most cases + 'issuer_name': parsed_issuer_name, 'issuer_ca_id': cert_data.get('issuer_ca_id'), 'common_name': cert_data.get('common_name'), 'not_before': cert_data.get('not_before'), 'not_after': cert_data.get('not_after'), 'entry_timestamp': cert_data.get('entry_timestamp'), - 'source': 'crt.sh' + 'source': 'crtsh' } try: @@ -257,9 +419,9 @@ class CrtShProvider(BaseProvider): metadata['is_currently_valid'] = self._is_cert_valid(cert_data) metadata['expires_soon'] = (not_after - datetime.now(timezone.utc)).days <= 30 - # Add human-readable dates - metadata['not_before'] = not_before.strftime('%Y-%m-%d %H:%M:%S UTC') - metadata['not_after'] = not_after.strftime('%Y-%m-%d %H:%M:%S UTC') + # Keep raw date format or convert to standard format + metadata['not_before'] = not_before.isoformat() + metadata['not_after'] = not_after.isoformat() except Exception as e: self.logger.logger.debug(f"Error computing certificate metadata: {e}") @@ -268,422 +430,73 @@ class CrtShProvider(BaseProvider): return metadata - def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: - """ - Query crt.sh for certificates containing the domain with caching support. - Properly raises exceptions for network errors to allow core logic retries. - """ - if not _is_valid_domain(domain): - return [] + def _parse_issuer_organization(self, issuer_dn: str) -> str: + """Parse the issuer Distinguished Name to extract just the organization name.""" + if not issuer_dn: + return issuer_dn - if self._stop_event and self._stop_event.is_set(): - return [] + try: + components = [comp.strip() for comp in issuer_dn.split(',')] + + for component in components: + if component.startswith('O='): + org_name = component[2:].strip() + if org_name.startswith('"') and org_name.endswith('"'): + org_name = org_name[1:-1] + return org_name + + return issuer_dn + + except Exception as e: + self.logger.logger.debug(f"Failed to parse issuer DN '{issuer_dn}': {e}") + return issuer_dn - cache_file = self._get_cache_file_path(domain) - cache_status = self._get_cache_status(cache_file) - - processed_certificates = [] + def _parse_certificate_date(self, date_string: str) -> datetime: + """Parse certificate date from crt.sh format.""" + if not date_string: + raise ValueError("Empty date string") try: - if cache_status == "fresh": - processed_certificates = self._load_cached_certificates(cache_file) - self.logger.logger.info(f"Using cached processed data for {domain} ({len(processed_certificates)} certificates)") - - else: # "stale" or "not_found" - raw_certificates = self._query_crtsh_api(domain) - - if self._stop_event and self._stop_event.is_set(): - return [] - - # Process raw data into the application's expected format - current_processed_certs = [self._extract_certificate_metadata(cert) for cert in raw_certificates] - - if cache_status == "stale": - # Append new processed certs to existing ones - processed_certificates = self._append_to_cache(cache_file, current_processed_certs) - self.logger.logger.info(f"Refreshed and appended cache for {domain}") - else: # "not_found" - # Create a new cache file with the processed certs, even if empty - self._create_cache_file(cache_file, domain, current_processed_certs) - processed_certificates = current_processed_certs - self.logger.logger.info(f"Cached fresh data for {domain} ({len(processed_certificates)} certificates)") - - - except requests.exceptions.RequestException as e: - self.logger.logger.error(f"API query failed for {domain}: {e}") - if cache_status != "not_found": - processed_certificates = self._load_cached_certificates(cache_file) - self.logger.logger.warning(f"Using stale cache for {domain} due to API failure.") + if date_string.endswith('Z'): + return datetime.fromisoformat(date_string[:-1]).replace(tzinfo=timezone.utc) + elif '+' in date_string or date_string.endswith('UTC'): + date_string = date_string.replace('UTC', '').strip() + if '+' in date_string: + date_string = date_string.split('+')[0] + return datetime.fromisoformat(date_string).replace(tzinfo=timezone.utc) else: - raise e # Re-raise if there's no cache to fall back on - - if not processed_certificates: - return [] - - return self._process_certificates_to_relationships(domain, processed_certificates) - - def _create_cache_file(self, cache_file_path: Path, domain: str, processed_certificates: List[Dict[str, Any]]) -> None: - """Create new cache file with processed certificates.""" - try: - cache_data = { - "domain": domain, - "last_upstream_query": datetime.now(timezone.utc).isoformat(), - "certificates": processed_certificates # Store processed data - } - cache_file_path.parent.mkdir(parents=True, exist_ok=True) - with open(cache_file_path, 'w') as f: - json.dump(cache_data, f, separators=(',', ':')) + return datetime.fromisoformat(date_string).replace(tzinfo=timezone.utc) except Exception as e: - self.logger.logger.warning(f"Failed to create cache file for {domain}: {e}") - - def _append_to_cache(self, cache_file_path: Path, new_processed_certificates: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Append new processed certificates to existing cache and return all certificates.""" - try: - with open(cache_file_path, 'r') as f: - cache_data = json.load(f) - - existing_ids = {cert.get('certificate_id') for cert in cache_data.get('certificates', [])} - - for cert in new_processed_certificates: - if cert.get('certificate_id') not in existing_ids: - cache_data['certificates'].append(cert) - - cache_data['last_upstream_query'] = datetime.now(timezone.utc).isoformat() - - with open(cache_file_path, 'w') as f: - json.dump(cache_data, f, separators=(',', ':')) - - return cache_data['certificates'] - except Exception as e: - self.logger.logger.warning(f"Failed to append to cache: {e}") - return new_processed_certificates - - def _process_certificates_to_relationships(self, domain: str, certificates: List[Dict[str, Any]]) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: - """ - Process certificates to relationships using existing logic. - This method contains the original processing logic from query_domain. - """ - relationships = [] - - # Check for cancellation before processing - if self._stop_event and self._stop_event.is_set(): - print(f"CrtSh processing cancelled before processing for domain: {domain}") - return [] - - # Aggregate certificate data by domain - domain_certificates = {} - all_discovered_domains = set() - - # Process certificates with cancellation checking - for i, cert_data in enumerate(certificates): - # Check for cancellation every 5 certificates for faster response - if i % 5 == 0 and self._stop_event and self._stop_event.is_set(): - print(f"CrtSh processing cancelled at certificate {i} for domain: {domain}") - break - - cert_metadata = self._extract_certificate_metadata(cert_data) - cert_domains = self._extract_domains_from_certificate(cert_data) - - # Add all domains from this certificate to our tracking - all_discovered_domains.update(cert_domains) - for cert_domain in cert_domains: - if not _is_valid_domain(cert_domain): - continue - - # Initialize domain certificate list if needed - if cert_domain not in domain_certificates: - domain_certificates[cert_domain] = [] - - # Add this certificate to the domain's certificate list - domain_certificates[cert_domain].append(cert_metadata) - - # Final cancellation check before creating relationships - if self._stop_event and self._stop_event.is_set(): - print(f"CrtSh query cancelled before relationship creation for domain: {domain}") - return [] - - # Create relationships from query domain to ALL discovered domains with stop checking - for i, discovered_domain in enumerate(all_discovered_domains): - if discovered_domain == domain: - continue # Skip self-relationships - - # Check for cancellation every 10 relationships - if i % 10 == 0 and self._stop_event and self._stop_event.is_set(): - print(f"CrtSh relationship creation cancelled for domain: {domain}") - break - - if not _is_valid_domain(discovered_domain): - continue - - # Get certificates for both domains - query_domain_certs = domain_certificates.get(domain, []) - discovered_domain_certs = domain_certificates.get(discovered_domain, []) - - # Find shared certificates (for metadata purposes) - shared_certificates = self._find_shared_certificates(query_domain_certs, discovered_domain_certs) - - # Calculate confidence based on relationship type and shared certificates - confidence = self._calculate_domain_relationship_confidence( - domain, discovered_domain, shared_certificates, all_discovered_domains - ) - - # Create comprehensive raw data for the relationship - relationship_raw_data = { - 'relationship_type': 'certificate_discovery', - 'shared_certificates': shared_certificates, - 'total_shared_certs': len(shared_certificates), - 'discovery_context': self._determine_relationship_context(discovered_domain, domain), - 'domain_certificates': { - domain: self._summarize_certificates(query_domain_certs), - discovered_domain: self._summarize_certificates(discovered_domain_certs) - } - } - - # Create domain -> domain relationship - relationships.append(( - domain, - discovered_domain, - 'san_certificate', - confidence, - relationship_raw_data - )) - - # Log the relationship discovery - self.log_relationship_discovery( - source_node=domain, - target_node=discovered_domain, - relationship_type='san_certificate', - confidence_score=confidence, - raw_data=relationship_raw_data, - discovery_method="certificate_transparency_analysis" - ) - - return relationships - - def _find_shared_certificates(self, certs1: List[Dict[str, Any]], certs2: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Find certificates that are shared between two domain certificate lists. - - Args: - certs1: First domain's certificates - certs2: Second domain's certificates - - Returns: - List of shared certificate metadata - """ - shared = [] - - # Create a set of certificate IDs from the first list for quick lookup - cert1_ids = set() - for cert in certs1: - cert_id = cert.get('certificate_id') - # Ensure the ID is not None and is a hashable type before adding to the set - if cert_id and isinstance(cert_id, (int, str, float, bool, tuple)): - cert1_ids.add(cert_id) - - # Find certificates in the second list that match - for cert in certs2: - cert_id = cert.get('certificate_id') - if cert_id and isinstance(cert_id, (int, str, float, bool, tuple)): - if cert_id in cert1_ids: - shared.append(cert) - - return shared - - def _summarize_certificates(self, certificates: List[Dict[str, Any]]) -> Dict[str, Any]: - """ - Create a summary of certificates for a domain. - - Args: - certificates: List of certificate metadata - - Returns: - Summary dictionary with aggregate statistics - """ - if not certificates: - return { - 'total_certificates': 0, - 'valid_certificates': 0, - 'expired_certificates': 0, - 'expires_soon_count': 0, - 'unique_issuers': [], - 'latest_certificate': None, - 'has_valid_cert': False, - 'certificate_details': [] # Always include empty list - } - - valid_count = sum(1 for cert in certificates if cert.get('is_currently_valid')) - expired_count = len(certificates) - valid_count - expires_soon_count = sum(1 for cert in certificates if cert.get('expires_soon')) - - # Get unique issuers (using parsed organization names) - unique_issuers = list(set(cert.get('issuer_name') for cert in certificates if cert.get('issuer_name'))) - - # Find the most recent certificate - latest_cert = None - latest_date = None - - for cert in certificates: try: - if cert.get('not_before'): - cert_date = self._parse_certificate_date(cert['not_before']) - if latest_date is None or cert_date > latest_date: - latest_date = cert_date - latest_cert = cert + return datetime.strptime(date_string[:19], "%Y-%m-%dT%H:%M:%S").replace(tzinfo=timezone.utc) except Exception: - continue - - # Sort certificates by date for better display (newest first) - sorted_certificates = sorted( - certificates, - key=lambda c: self._get_certificate_sort_date(c), - reverse=True - ) - - return { - 'total_certificates': len(certificates), - 'valid_certificates': valid_count, - 'expired_certificates': expired_count, - 'expires_soon_count': expires_soon_count, - 'unique_issuers': unique_issuers, - 'latest_certificate': latest_cert, - 'has_valid_cert': valid_count > 0, - 'certificate_details': sorted_certificates # Include full certificate details - } + raise ValueError(f"Unable to parse date: {date_string}") from e - def _get_certificate_sort_date(self, cert: Dict[str, Any]) -> datetime: - """ - Get a sortable date from certificate data for chronological ordering. - - Args: - cert: Certificate metadata dictionary - - Returns: - Datetime object for sorting (falls back to epoch if parsing fails) - """ + def _is_cert_valid(self, cert_data: Dict[str, Any]) -> bool: + """Check if a certificate is currently valid based on its expiry date.""" try: - # Try not_before first (issue date) - if cert.get('not_before'): - return self._parse_certificate_date(cert['not_before']) - - # Fall back to entry_timestamp if available - if cert.get('entry_timestamp'): - return self._parse_certificate_date(cert['entry_timestamp']) - - # Last resort - return a very old date for certificates without dates - return datetime(1970, 1, 1, tzinfo=timezone.utc) - - except Exception: - # If all parsing fails, return epoch - return datetime(1970, 1, 1, tzinfo=timezone.utc) + not_after_str = cert_data.get('not_after') + if not not_after_str: + return False - def _calculate_domain_relationship_confidence(self, domain1: str, domain2: str, - shared_certificates: List[Dict[str, Any]], - all_discovered_domains: Set[str]) -> float: - """ - Calculate confidence score for domain relationship based on various factors. - - Args: - domain1: Source domain (query domain) - domain2: Target domain (discovered domain) - shared_certificates: List of shared certificate metadata - all_discovered_domains: All domains discovered in this query - - Returns: - Confidence score between 0.0 and 1.0 - """ - base_confidence = 0.9 - - # Adjust confidence based on domain relationship context - relationship_context = self._determine_relationship_context(domain2, domain1) - - if relationship_context == 'exact_match': - context_bonus = 0.0 # This shouldn't happen, but just in case - elif relationship_context == 'subdomain': - context_bonus = 0.1 # High confidence for subdomains - elif relationship_context == 'parent_domain': - context_bonus = 0.05 # Medium confidence for parent domains - else: - context_bonus = 0.0 # Related domains get base confidence - - # Adjust confidence based on shared certificates - if shared_certificates: - shared_count = len(shared_certificates) - if shared_count >= 3: - shared_bonus = 0.1 - elif shared_count >= 2: - shared_bonus = 0.05 - else: - shared_bonus = 0.02 - - # Additional bonus for valid shared certificates - valid_shared = sum(1 for cert in shared_certificates if cert.get('is_currently_valid')) - if valid_shared > 0: - validity_bonus = 0.05 - else: - validity_bonus = 0.0 - else: - # Even without shared certificates, domains found in the same query have some relationship - shared_bonus = 0.0 - validity_bonus = 0.0 - - # Adjust confidence based on certificate issuer reputation (if shared certificates exist) - issuer_bonus = 0.0 - if shared_certificates: - for cert in shared_certificates: - issuer = cert.get('issuer_name', '').lower() - if any(trusted_ca in issuer for trusted_ca in ['let\'s encrypt', 'digicert', 'sectigo', 'globalsign']): - issuer_bonus = max(issuer_bonus, 0.03) - break - - # Calculate final confidence - final_confidence = base_confidence + context_bonus + shared_bonus + validity_bonus + issuer_bonus - return max(0.1, min(1.0, final_confidence)) # Clamp between 0.1 and 1.0 + not_after_date = self._parse_certificate_date(not_after_str) + not_before_str = cert_data.get('not_before') + + now = datetime.now(timezone.utc) + is_not_expired = not_after_date > now + + if not_before_str: + not_before_date = self._parse_certificate_date(not_before_str) + is_not_before_valid = not_before_date <= now + return is_not_expired and is_not_before_valid + + return is_not_expired + + except Exception as e: + return False - def _determine_relationship_context(self, cert_domain: str, query_domain: str) -> str: - """ - Determine the context of the relationship between certificate domain and query domain. - - Args: - cert_domain: Domain found in certificate - query_domain: Original query domain - - Returns: - String describing the relationship context - """ - if cert_domain == query_domain: - return 'exact_match' - elif cert_domain.endswith(f'.{query_domain}'): - return 'subdomain' - elif query_domain.endswith(f'.{cert_domain}'): - return 'parent_domain' - else: - return 'related_domain' - - def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: - """ - Query crt.sh for certificates containing the IP address. - Note: crt.sh doesn't typically index by IP, so this returns empty results. - - Args: - ip: IP address to investigate - - Returns: - Empty list (crt.sh doesn't support IP-based certificate queries effectively) - """ - # crt.sh doesn't effectively support IP-based certificate queries - return [] - def _extract_domains_from_certificate(self, cert_data: Dict[str, Any]) -> Set[str]: - """ - Extract all domains from certificate data. - - Args: - cert_data: Certificate data from crt.sh API - - Returns: - Set of unique domain names found in the certificate - """ + """Extract all domains from certificate data.""" domains = set() # Extract from common name @@ -696,50 +509,72 @@ class CrtShProvider(BaseProvider): # Extract from name_value field (contains SANs) name_value = cert_data.get('name_value', '') if name_value: - # Split by newlines and clean each domain for line in name_value.split('\n'): cleaned_domains = self._clean_domain_name(line.strip()) if cleaned_domains: domains.update(cleaned_domains) return domains - + def _clean_domain_name(self, domain_name: str) -> List[str]: - """ - Clean and normalize domain name from certificate data. - Now returns a list to handle wildcards correctly. - """ + """Clean and normalize domain name from certificate data.""" if not domain_name: return [] domain = domain_name.strip().lower() - # Remove protocol if present if domain.startswith(('http://', 'https://')): domain = domain.split('://', 1)[1] - # Remove path if present if '/' in domain: domain = domain.split('/', 1)[0] - # Remove port if present - if ':' in domain and not domain.count(':') > 1: # Avoid breaking IPv6 + if ':' in domain and not domain.count(':') > 1: domain = domain.split(':', 1)[0] - # Handle wildcard domains cleaned_domains = [] if domain.startswith('*.'): - # Add both the wildcard and the base domain cleaned_domains.append(domain) cleaned_domains.append(domain[2:]) else: cleaned_domains.append(domain) - # Remove any remaining invalid characters and validate final_domains = [] for d in cleaned_domains: d = re.sub(r'[^\w\-\.]', '', d) if d and not d.startswith(('.', '-')) and not d.endswith(('.', '-')): final_domains.append(d) - return [d for d in final_domains if _is_valid_domain(d)] \ No newline at end of file + return [d for d in final_domains if _is_valid_domain(d)] + + def _calculate_domain_relationship_confidence(self, domain1: str, domain2: str, + shared_certificates: List[Dict[str, Any]], + all_discovered_domains: Set[str]) -> float: + """Calculate confidence score for domain relationship based on various factors.""" + base_confidence = 0.9 + + # Adjust confidence based on domain relationship context + relationship_context = self._determine_relationship_context(domain2, domain1) + + if relationship_context == 'exact_match': + context_bonus = 0.0 + elif relationship_context == 'subdomain': + context_bonus = 0.1 + elif relationship_context == 'parent_domain': + context_bonus = 0.05 + else: + context_bonus = 0.0 + + final_confidence = base_confidence + context_bonus + return max(0.1, min(1.0, final_confidence)) + + def _determine_relationship_context(self, cert_domain: str, query_domain: str) -> str: + """Determine the context of the relationship between certificate domain and query domain.""" + if cert_domain == query_domain: + return 'exact_match' + elif cert_domain.endswith(f'.{query_domain}'): + return 'subdomain' + elif query_domain.endswith(f'.{cert_domain}'): + return 'parent_domain' + else: + return 'related_domain' \ No newline at end of file diff --git a/providers/dns_provider.py b/providers/dns_provider.py index d73ef6c..3aef192 100644 --- a/providers/dns_provider.py +++ b/providers/dns_provider.py @@ -1,15 +1,16 @@ # dnsrecon/providers/dns_provider.py from dns import resolver, reversename -from typing import List, Dict, Any, Tuple +from typing import Dict from .base_provider import BaseProvider -from utils.helpers import _is_valid_ip, _is_valid_domain +from core.provider_result import ProviderResult +from utils.helpers import _is_valid_ip, _is_valid_domain, get_ip_version class DNSProvider(BaseProvider): """ Provider for standard DNS resolution and reverse DNS lookups. - Now uses session-specific configuration. + Now returns standardized ProviderResult objects with IPv4 and IPv6 support. """ def __init__(self, name=None, session_config=None): @@ -25,7 +26,6 @@ class DNSProvider(BaseProvider): self.resolver = resolver.Resolver() self.resolver.timeout = 5 self.resolver.lifetime = 10 - #self.resolver.nameservers = ['127.0.0.1'] def get_name(self) -> str: """Return the provider name.""" @@ -47,80 +47,118 @@ class DNSProvider(BaseProvider): """DNS is always available - no API key required.""" return True - def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + def query_domain(self, domain: str) -> ProviderResult: """ - Query DNS records for the domain to discover relationships. - ... + Query DNS records for the domain to discover relationships and attributes. + FIXED: Now creates separate attributes for each DNS record type. + + Args: + domain: Domain to investigate + + Returns: + ProviderResult containing discovered relationships and attributes """ if not _is_valid_domain(domain): - return [] + return ProviderResult() - relationships = [] + result = ProviderResult() - # Query all record types + # Query all record types - each gets its own attribute for record_type in ['A', 'AAAA', 'CNAME', 'MX', 'NS', 'SOA', 'TXT', 'SRV', 'CAA']: try: - relationships.extend(self._query_record(domain, record_type)) - except resolver.NoAnswer: + self._query_record(domain, record_type, result) + #except resolver.NoAnswer: # This is not an error, just a confirmation that the record doesn't exist. - self.logger.logger.debug(f"No {record_type} record found for {domain}") + #self.logger.logger.debug(f"No {record_type} record found for {domain}") except Exception as e: self.failed_requests += 1 self.logger.logger.debug(f"{record_type} record query failed for {domain}: {e}") - # Optionally, you might want to re-raise other, more serious exceptions. - return relationships + return result - def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + def query_ip(self, ip: str) -> ProviderResult: """ - Query reverse DNS for the IP address. + Query reverse DNS for the IP address (supports both IPv4 and IPv6). Args: - ip: IP address to investigate + ip: IP address to investigate (IPv4 or IPv6) Returns: - List of relationships discovered from reverse DNS + ProviderResult containing discovered relationships and attributes """ if not _is_valid_ip(ip): - return [] + return ProviderResult() - relationships = [] + result = ProviderResult() + ip_version = get_ip_version(ip) try: - # Perform reverse DNS lookup + # Perform reverse DNS lookup (works for both IPv4 and IPv6) self.total_requests += 1 reverse_name = reversename.from_address(ip) response = self.resolver.resolve(reverse_name, 'PTR') self.successful_requests += 1 + ptr_records = [] for ptr_record in response: hostname = str(ptr_record).rstrip('.') if _is_valid_domain(hostname): - raw_data = { - 'query_type': 'PTR', - 'ip_address': ip, - 'hostname': hostname, - 'ttl': response.ttl - } + # Determine appropriate forward relationship type based on IP version + if ip_version == 6: + relationship_type = 'dns_aaaa_record' + record_prefix = 'AAAA' + else: + relationship_type = 'dns_a_record' + record_prefix = 'A' + + # Add the relationship + result.add_relationship( + source_node=ip, + target_node=hostname, + relationship_type='dns_ptr_record', + provider=self.name, + confidence=0.8, + raw_data={ + 'query_type': 'PTR', + 'ip_address': ip, + 'ip_version': ip_version, + 'hostname': hostname, + 'ttl': response.ttl + } + ) - relationships.append(( - ip, - hostname, - 'ptr_record', - 0.8, - raw_data - )) + # Add to PTR records list + ptr_records.append(f"PTR: {hostname}") + # Log the relationship discovery self.log_relationship_discovery( source_node=ip, target_node=hostname, - relationship_type='ptr_record', + relationship_type='dns_ptr_record', confidence_score=0.8, - raw_data=raw_data, - discovery_method="reverse_dns_lookup" + raw_data={ + 'query_type': 'PTR', + 'ip_address': ip, + 'ip_version': ip_version, + 'hostname': hostname, + 'ttl': response.ttl + }, + discovery_method=f"reverse_dns_lookup_ipv{ip_version}" ) + # Add PTR records as separate attribute + if ptr_records: + result.add_attribute( + target_node=ip, + name='ptr_records', # Specific name for PTR records + value=ptr_records, + attr_type='dns_record', + provider=self.name, + confidence=0.8, + metadata={'ttl': response.ttl, 'ip_version': ip_version} + ) + except resolver.NXDOMAIN: self.failed_requests += 1 self.logger.logger.debug(f"Reverse DNS lookup failed for {ip}: NXDOMAIN") @@ -130,22 +168,28 @@ class DNSProvider(BaseProvider): # Re-raise the exception so the scanner can handle the failure raise e - return relationships + return result - def _query_record(self, domain: str, record_type: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + def _query_record(self, domain: str, record_type: str, result: ProviderResult) -> None: """ - Query a specific type of DNS record for the domain. + FIXED: Query DNS records with unique attribute names for each record type. + Enhanced to better handle IPv6 AAAA records. """ - relationships = [] try: self.total_requests += 1 response = self.resolver.resolve(domain, record_type) self.successful_requests += 1 + dns_records = [] + for record in response: target = "" if record_type in ['A', 'AAAA']: target = str(record) + # Validate that the IP address is properly formed + if not _is_valid_ip(target): + self.logger.logger.debug(f"Invalid IP address in {record_type} record: {target}") + continue elif record_type in ['CNAME', 'NS', 'PTR']: target = str(record.target).rstrip('.') elif record_type == 'MX': @@ -153,46 +197,90 @@ class DNSProvider(BaseProvider): elif record_type == 'SOA': target = str(record.mname).rstrip('.') elif record_type in ['TXT']: - # TXT records are treated as metadata, not relationships. + # Keep raw TXT record value + txt_value = str(record).strip('"') + dns_records.append(txt_value) # Just the value for TXT continue elif record_type == 'SRV': target = str(record.target).rstrip('.') elif record_type == 'CAA': - target = f"{record.flags} {record.tag.decode('utf-8')} \"{record.value.decode('utf-8')}\"" + # Keep raw CAA record format + caa_value = f"{record.flags} {record.tag.decode('utf-8')} \"{record.value.decode('utf-8')}\"" + dns_records.append(caa_value) # Just the value for CAA + continue else: target = str(record) if target: + # Determine IP version for metadata if this is an IP record + ip_version = None + if record_type in ['A', 'AAAA'] and _is_valid_ip(target): + ip_version = get_ip_version(target) + raw_data = { 'query_type': record_type, 'domain': domain, 'value': target, 'ttl': response.ttl } - relationship_type = f"{record_type.lower()}_record" - confidence = 0.8 # Default confidence for DNS records + + if ip_version: + raw_data['ip_version'] = ip_version + + relationship_type = f"dns_{record_type.lower()}_record" + confidence = 0.8 - relationships.append(( - domain, - target, - relationship_type, - confidence, - raw_data - )) + # Add relationship + result.add_relationship( + source_node=domain, + target_node=target, + relationship_type=relationship_type, + provider=self.name, + confidence=confidence, + raw_data=raw_data + ) + # Add target to records list + dns_records.append(target) + + # Log relationship discovery with IP version info + discovery_method = f"dns_{record_type.lower()}_record" + if ip_version: + discovery_method += f"_ipv{ip_version}" + self.log_relationship_discovery( source_node=domain, target_node=target, relationship_type=relationship_type, confidence_score=confidence, raw_data=raw_data, - discovery_method=f"dns_{record_type.lower()}_record" + discovery_method=discovery_method ) + # FIXED: Create attribute with specific name for each record type + if dns_records: + # Use record type specific attribute name (e.g., 'a_records', 'mx_records', etc.) + attribute_name = f"{record_type.lower()}_records" + + metadata = {'record_type': record_type, 'ttl': response.ttl} + + # Add IP version info for A/AAAA records + if record_type in ['A', 'AAAA'] and dns_records: + first_ip_version = get_ip_version(dns_records[0]) + if first_ip_version: + metadata['ip_version'] = first_ip_version + + result.add_attribute( + target_node=domain, + name=attribute_name, # UNIQUE name for each record type! + value=dns_records, + attr_type='dns_record_list', + provider=self.name, + confidence=0.8, + metadata=metadata + ) + except Exception as e: self.failed_requests += 1 self.logger.logger.debug(f"{record_type} record query failed for {domain}: {e}") - # Re-raise the exception so the scanner can handle it - raise e - - return relationships \ No newline at end of file + raise e \ No newline at end of file diff --git a/providers/shodan_provider.py b/providers/shodan_provider.py index 30c48f5..f21c2dc 100644 --- a/providers/shodan_provider.py +++ b/providers/shodan_provider.py @@ -1,20 +1,20 @@ # dnsrecon/providers/shodan_provider.py import json -import os from pathlib import Path -from typing import List, Dict, Any, Tuple +from typing import Dict, Any from datetime import datetime, timezone import requests from .base_provider import BaseProvider -from utils.helpers import _is_valid_ip, _is_valid_domain +from core.provider_result import ProviderResult +from utils.helpers import _is_valid_ip, _is_valid_domain, get_ip_version, normalize_ip class ShodanProvider(BaseProvider): """ Provider for querying Shodan API for IP address information. - Now uses session-specific API keys, is limited to IP-only queries, and includes caching. + Now returns standardized ProviderResult objects with caching support for IPv4 and IPv6. """ def __init__(self, name=None, session_config=None): @@ -53,8 +53,19 @@ class ShodanProvider(BaseProvider): return {'domains': False, 'ips': True} def _get_cache_file_path(self, ip: str) -> Path: - """Generate cache file path for an IP address.""" - safe_ip = ip.replace('.', '_').replace(':', '_') + """ + Generate cache file path for an IP address (IPv4 or IPv6). + IPv6 addresses contain colons which are replaced with underscores for filesystem safety. + """ + # Normalize the IP address first to ensure consistent caching + normalized_ip = normalize_ip(ip) + if not normalized_ip: + # Fallback for invalid IPs + safe_ip = ip.replace('.', '_').replace(':', '_') + else: + # Replace problematic characters for both IPv4 and IPv6 + safe_ip = normalized_ip.replace('.', '_').replace(':', '_') + return self.cache_dir / f"{safe_ip}.json" def _get_cache_status(self, cache_file_path: Path) -> str: @@ -85,115 +96,254 @@ class ShodanProvider(BaseProvider): except (json.JSONDecodeError, ValueError, KeyError): return "stale" - def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + def query_domain(self, domain: str) -> ProviderResult: """ Domain queries are no longer supported for the Shodan provider. + + Args: + domain: Domain to investigate + + Returns: + Empty ProviderResult """ - return [] + return ProviderResult() - def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + def query_ip(self, ip: str) -> ProviderResult: """ - Query Shodan for information about an IP address, with caching of processed relationships. + Query Shodan for information about an IP address (IPv4 or IPv6), with caching of processed data. + + Args: + ip: IP address to investigate (IPv4 or IPv6) + + Returns: + ProviderResult containing discovered relationships and attributes """ if not _is_valid_ip(ip) or not self.is_available(): - return [] + return ProviderResult() - cache_file = self._get_cache_file_path(ip) + # Normalize IP address for consistent processing + normalized_ip = normalize_ip(ip) + if not normalized_ip: + return ProviderResult() + + cache_file = self._get_cache_file_path(normalized_ip) cache_status = self._get_cache_status(cache_file) - relationships = [] + result = ProviderResult() try: if cache_status == "fresh": - relationships = self._load_from_cache(cache_file) - self.logger.logger.info(f"Using cached Shodan relationships for {ip}") - else: # "stale" or "not_found" - url = f"{self.base_url}/shodan/host/{ip}" + result = self._load_from_cache(cache_file) + self.logger.logger.info(f"Using cached Shodan data for {normalized_ip}") + else: # "stale" or "not_found" + url = f"{self.base_url}/shodan/host/{normalized_ip}" params = {'key': self.api_key} - response = self.make_request(url, method="GET", params=params, target_indicator=ip) + response = self.make_request(url, method="GET", params=params, target_indicator=normalized_ip) if response and response.status_code == 200: data = response.json() - # Process the data into relationships BEFORE caching - relationships = self._process_shodan_data(ip, data) - self._save_to_cache(cache_file, relationships) # Save the processed relationships + # Process the data into ProviderResult BEFORE caching + result = self._process_shodan_data(normalized_ip, data) + self._save_to_cache(cache_file, result, data) # Save both result and raw data + elif response and response.status_code == 404: + # Handle 404 "No information available" as successful empty result + try: + error_data = response.json() + if "No information available" in error_data.get('error', ''): + # This is a successful query - Shodan just has no data + self.logger.logger.debug(f"Shodan has no information for {normalized_ip}") + result = ProviderResult() # Empty but successful result + # Cache the empty result to avoid repeated queries + self._save_to_cache(cache_file, result, {'error': 'No information available'}) + else: + # Some other 404 error - treat as failure + raise requests.exceptions.RequestException(f"Shodan API returned 404: {error_data}") + except (ValueError, KeyError): + # Could not parse JSON response - treat as failure + raise requests.exceptions.RequestException(f"Shodan API returned 404 with unparseable response") elif cache_status == "stale": # If API fails on a stale cache, use the old data - relationships = self._load_from_cache(cache_file) + result = self._load_from_cache(cache_file) + else: + # Other HTTP error codes should be treated as failures + status_code = response.status_code if response else "No response" + raise requests.exceptions.RequestException(f"Shodan API returned HTTP {status_code}") except requests.exceptions.RequestException as e: - self.logger.logger.error(f"Shodan API query failed for {ip}: {e}") + self.logger.logger.info(f"Shodan API query returned no info for {normalized_ip}: {e}") if cache_status == "stale": - relationships = self._load_from_cache(cache_file) + result = self._load_from_cache(cache_file) + else: + # Re-raise for retry scheduling - but only for actual failures + raise e - return relationships + return result - def _load_from_cache(self, cache_file_path: Path) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: - """Load processed Shodan relationships from a cache file.""" + def _load_from_cache(self, cache_file_path: Path) -> ProviderResult: + """Load processed Shodan data from a cache file.""" try: with open(cache_file_path, 'r') as f: cache_content = json.load(f) - # The entire file content is the list of relationships - return cache_content.get("relationships", []) + + result = ProviderResult() + + # Reconstruct relationships + for rel_data in cache_content.get("relationships", []): + result.add_relationship( + source_node=rel_data["source_node"], + target_node=rel_data["target_node"], + relationship_type=rel_data["relationship_type"], + provider=rel_data["provider"], + confidence=rel_data["confidence"], + raw_data=rel_data.get("raw_data", {}) + ) + + # Reconstruct attributes + for attr_data in cache_content.get("attributes", []): + result.add_attribute( + target_node=attr_data["target_node"], + name=attr_data["name"], + value=attr_data["value"], + attr_type=attr_data["type"], + provider=attr_data["provider"], + confidence=attr_data["confidence"], + metadata=attr_data.get("metadata", {}) + ) + + return result + except (json.JSONDecodeError, FileNotFoundError, KeyError): - return [] + return ProviderResult() - def _save_to_cache(self, cache_file_path: Path, relationships: List[Tuple[str, str, str, float, Dict[str, Any]]]) -> None: - """Save processed Shodan relationships to a cache file.""" + def _save_to_cache(self, cache_file_path: Path, result: ProviderResult, raw_data: Dict[str, Any]) -> None: + """Save processed Shodan data to a cache file.""" try: cache_data = { "last_upstream_query": datetime.now(timezone.utc).isoformat(), - "relationships": relationships + "raw_data": raw_data, # Preserve original for forensic purposes + "relationships": [ + { + "source_node": rel.source_node, + "target_node": rel.target_node, + "relationship_type": rel.relationship_type, + "confidence": rel.confidence, + "provider": rel.provider, + "raw_data": rel.raw_data + } for rel in result.relationships + ], + "attributes": [ + { + "target_node": attr.target_node, + "name": attr.name, + "value": attr.value, + "type": attr.type, + "provider": attr.provider, + "confidence": attr.confidence, + "metadata": attr.metadata + } for attr in result.attributes + ] } with open(cache_file_path, 'w') as f: - json.dump(cache_data, f, separators=(',', ':')) + json.dump(cache_data, f, separators=(',', ':'), default=str) except Exception as e: self.logger.logger.warning(f"Failed to save Shodan cache for {cache_file_path.name}: {e}") - def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> ProviderResult: """ - Process Shodan data to extract relationships. + VERIFIED: Process Shodan data creating ISP nodes with ASN attributes and proper relationships. + Enhanced to include IP version information for IPv6 addresses. """ - relationships = [] + result = ProviderResult() + + # Determine IP version for metadata + ip_version = get_ip_version(ip) - # Extract hostname relationships - hostnames = data.get('hostnames', []) - for hostname in hostnames: - if _is_valid_domain(hostname): - relationships.append(( - ip, - hostname, - 'a_record', - 0.8, - data - )) - self.log_relationship_discovery( - source_node=ip, - target_node=hostname, - relationship_type='a_record', - confidence_score=0.8, - raw_data=data, - discovery_method="shodan_host_lookup" - ) + # VERIFIED: Extract ISP information and create proper ISP node with ASN + isp_name = data.get('org') + asn_value = data.get('asn') - # Extract ASN relationship - asn = data.get('asn') - if asn: - asn_name = f"AS{asn[2:]}" if isinstance(asn, str) and asn.startswith('AS') else f"AS{asn}" - relationships.append(( - ip, - asn_name, - 'asn_membership', - 0.7, - data - )) - self.log_relationship_discovery( + if isp_name and asn_value: + # Create relationship from IP to ISP + result.add_relationship( source_node=ip, - target_node=asn_name, - relationship_type='asn_membership', - confidence_score=0.7, - raw_data=data, - discovery_method="shodan_asn_lookup" + target_node=isp_name, + relationship_type='shodan_isp', + provider=self.name, + confidence=0.9, + raw_data={'asn': asn_value, 'shodan_org': isp_name, 'ip_version': ip_version} ) - return relationships \ No newline at end of file + # Add ASN as attribute to the ISP node + result.add_attribute( + target_node=isp_name, + name='asn', + value=asn_value, + attr_type='isp_info', + provider=self.name, + confidence=0.9, + metadata={'description': 'Autonomous System Number from Shodan', 'ip_version': ip_version} + ) + + # Also add organization name as attribute to ISP node for completeness + result.add_attribute( + target_node=isp_name, + name='organization_name', + value=isp_name, + attr_type='isp_info', + provider=self.name, + confidence=0.9, + metadata={'description': 'Organization name from Shodan', 'ip_version': ip_version} + ) + + # Process hostnames (reverse DNS) + for key, value in data.items(): + if key == 'hostnames': + for hostname in value: + if _is_valid_domain(hostname): + # Use appropriate relationship type based on IP version + if ip_version == 6: + relationship_type = 'shodan_aaaa_record' + else: + relationship_type = 'shodan_a_record' + + result.add_relationship( + source_node=ip, + target_node=hostname, + relationship_type=relationship_type, + provider=self.name, + confidence=0.8, + raw_data={**data, 'ip_version': ip_version} + ) + self.log_relationship_discovery( + source_node=ip, + target_node=hostname, + relationship_type=relationship_type, + confidence_score=0.8, + raw_data={**data, 'ip_version': ip_version}, + discovery_method=f"shodan_host_lookup_ipv{ip_version}" + ) + elif key == 'ports': + # Add open ports as attributes to the IP + for port in value: + result.add_attribute( + target_node=ip, + name='shodan_open_port', + value=port, + attr_type='shodan_network_info', + provider=self.name, + confidence=0.9, + metadata={'ip_version': ip_version} + ) + elif isinstance(value, (str, int, float, bool)) and value is not None: + # Add other Shodan fields as IP attributes (keep raw field names) + result.add_attribute( + target_node=ip, + name=key, # Raw field name from Shodan API + value=value, + attr_type='shodan_info', + provider=self.name, + confidence=0.9, + metadata={'ip_version': ip_version} + ) + + return result \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 0e37daa..d46c0bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -Flask>=2.3.3 -networkx>=3.1 -requests>=2.31.0 -python-dateutil>=2.8.2 -Werkzeug>=2.3.7 -urllib3>=2.0.0 -dnspython>=2.4.2 +Flask +networkx +requests +python-dateutil +Werkzeug +urllib3 +dnspython gunicorn redis python-dotenv \ No newline at end of file diff --git a/static/css/main.css b/static/css/main.css index 0177c4b..1d68ffd 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -1,6 +1,6 @@ -/* DNSRecon - Enhanced Tactical/Cybersecurity Theme */ +/* DNSRecon - Optimized Compact Theme */ -/* Reset and Base Styles */ +/* Reset and Base */ * { margin: 0; padding: 0; @@ -8,28 +8,27 @@ } body { - font-family: 'Roboto Mono', 'SF Mono', 'Monaco', 'Inconsolata', 'Fira Code', monospace; + font-family: 'Roboto Mono', monospace; background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%); color: #e0e0e0; - line-height: 1.6; - letter-spacing: 0.5px; + line-height: 1.4; + letter-spacing: 0.3px; + font-size: 0.85rem; min-height: 100vh; overflow-x: hidden; } -/* Container and Layout */ .container { min-height: 100vh; display: flex; flex-direction: column; } -/* === ENHANCED HEADER === */ +/* Header */ .header { background: linear-gradient(135deg, #0a0a0a 0%, #1a1a1a 100%); - border-bottom: 2px solid #333; - padding: 1.5rem 2rem; - box-shadow: 0 4px 20px rgba(0,0,0,0.5); + border-bottom: 1px solid #333; + padding: 0.75rem 1rem; position: sticky; top: 0; z-index: 100; @@ -39,64 +38,44 @@ body { display: flex; justify-content: space-between; align-items: center; - max-width: 1400px; + max-width: 2400px; margin: 0 auto; } .logo { display: flex; align-items: center; - font-family: 'Special Elite', 'Courier New', monospace; - font-size: 1.6rem; + font-family: 'Special Elite', monospace; + font-size: 1.3rem; font-weight: 700; - gap: 0.5rem; + gap: 0.3rem; } .logo-icon { color: #00ff41; - margin-right: 0.5rem; - text-shadow: 0 0 15px rgba(0, 255, 65, 0.6); - animation: logoGlow 3s ease-in-out infinite alternate; -} - -@keyframes logoGlow { - from { text-shadow: 0 0 15px rgba(0, 255, 65, 0.6); } - to { text-shadow: 0 0 25px rgba(0, 255, 65, 0.8); } + text-shadow: 0 0 10px rgba(0, 255, 65, 0.5); } .logo-text { color: #c7c7c7; - text-shadow: 0 0 5px rgba(199, 199, 199, 0.3); } .status-indicator { display: flex; align-items: center; - gap: 0.75rem; - padding: 0.5rem 1rem; + gap: 0.5rem; + padding: 0.3rem 0.75rem; background: rgba(42, 42, 42, 0.6); - border-radius: 25px; + border-radius: 15px; border: 1px solid #333; } -.status-indicator.scanning { - animation: pulse 1.5s infinite; -} - -.status-indicator.completed { - background-color: #00ff41; -} - -.status-indicator.error { - background-color: #ff6b6b; -} - .status-dot { - width: 10px; - height: 10px; + width: 8px; + height: 8px; border-radius: 50%; background-color: #00ff41; - box-shadow: 0 0 10px rgba(0, 255, 65, 0.7); + box-shadow: 0 0 8px rgba(0, 255, 65, 0.6); animation: statusPulse 2s infinite; } @@ -105,140 +84,147 @@ body { 50% { opacity: 0.7; transform: scale(1.1); } } -@keyframes pulse { - 0%, 100% { opacity: 1; } - 50% { opacity: 0.5; } +.status-indicator.scanning { + animation: pulse 1.5s infinite; } .status-text { - font-size: 0.9rem; + font-size: 0.8rem; color: #00ff41; } -/* Main Content */ +/* Main Layout - Graph-Centric */ .main-content { flex: 1; - padding: 1.25rem; /* Reduced from 2rem */ - max-width: 1400px; + padding: 0.75rem; + max-width: 2400px; margin: 0 auto; width: 100%; display: grid; - grid-template-columns: 1fr 1fr; - grid-template-rows: auto auto 1fr auto; - gap: 1rem; /* Reduced from 1.5rem */ + grid-template-columns: 300px 1fr; + grid-template-rows: auto 1fr auto; + gap: 0.75rem; grid-template-areas: "control status" "visualization visualization" - "visualization visualization" "providers providers"; } -/* === ENHANCED PANELS === */ +/* Unified Panel Styles */ section { background: linear-gradient(135deg, #2a2a2a 0%, #1e1e1e 100%); border: 1px solid #444; - border-radius: 8px; - box-shadow: 0 8px 32px rgba(0,0,0,0.3); + border-radius: 6px; overflow: hidden; - transition: all 0.3s ease; -} - -section:hover { - border-color: #555; - box-shadow: 0 12px 40px rgba(0,0,0,0.4); + box-shadow: 0 4px 15px rgba(0,0,0,0.2); } .panel-header { background: linear-gradient(90deg, #333 0%, #2a2a2a 100%); - padding: 0.75rem 1rem; /* Reduced from 1.25rem 1.5rem */ - border-bottom: 2px solid #444; - position: relative; - overflow: hidden; - display: flex; - justify-content: space-between; - align-items: center; -} - -/* Remove the generic top line */ -.panel-header::before { - display: none; + padding: 0.5rem 0.75rem; + border-bottom: 1px solid #444; } .panel-header h2 { - font-size: 1.2rem; + font-size: 1rem; color: #00ff41; - text-shadow: 0 0 10px rgba(0, 255, 65, 0.4); font-weight: 600; margin: 0; - letter-spacing: 1px; } -/* Control Panel */ +/* Controls */ .control-panel { grid-area: control; } .form-container { - padding: 1rem; + padding: 0.75rem; } .input-group { - margin-bottom: 1.25rem; + margin-bottom: 0.75rem; } .input-group label { display: block; - margin-bottom: 0.75rem; + margin-bottom: 0.4rem; color: #e0e0e0; - font-size: 0.95rem; + font-size: 0.8rem; font-weight: 500; - letter-spacing: 0.5px; } input[type="text"], select { width: 100%; - padding: 1rem; - background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%); - border: 2px solid #444; - border-radius: 6px; + padding: 0.6rem; + background: #1a1a1a; + border: 1px solid #444; + border-radius: 4px; color: #e0e0e0; font-family: 'Roboto Mono', monospace; - font-size: 0.95rem; - transition: all 0.3s ease; - position: relative; + font-size: 0.8rem; + transition: border-color 0.3s ease; } input[type="text"]:focus, select:focus { outline: none; border-color: #00ff41; - box-shadow: 0 0 15px rgba(0, 255, 65, 0.3); - background: linear-gradient(135deg, #1f1f1f 0%, #141414 100%); + box-shadow: 0 0 8px rgba(0, 255, 65, 0.3); } .button-group { display: flex; flex-direction: column; - gap: 0.75rem; + gap: 0.5rem; } -/* === ENHANCED BUTTONS === */ +/* Unified Button Styles */ .btn { display: flex; align-items: center; justify-content: center; - gap: 0.75rem; - padding: 1rem 1.5rem; + gap: 0.5rem; + padding: 0.6rem 1rem; font-family: 'Roboto Mono', monospace; - font-size: 0.95rem; + font-size: 0.8rem; font-weight: 500; border: none; - border-radius: 6px; + border-radius: 4px; cursor: pointer; transition: all 0.3s ease; text-transform: uppercase; - letter-spacing: 0.5px; - position: relative; - overflow: hidden; +} + +.btn-primary { + background: linear-gradient(135deg, #2c5c34 0%, #1e4025 100%); + color: #e0e0e0; + border: 1px solid #3a7a48; +} + +.btn-primary:hover:not(:disabled) { + background: linear-gradient(135deg, #3d7d4e 0%, #2a5436 100%); + transform: translateY(-1px); +} + +.btn-secondary { + background: linear-gradient(135deg, #4a4a4a 0%, #333 100%); + color: #e0e0e0; + border: 1px solid #555; +} + +.btn-secondary:hover:not(:disabled) { + background: linear-gradient(135deg, #5a5a5a 0%, #444 100%); + transform: translateY(-1px); +} + +.btn:disabled { + opacity: 0.5; + cursor: not-allowed; + transform: none !important; +} + +.btn.loading { + opacity: 0.7; + pointer-events: none; } .btn::before { @@ -256,61 +242,9 @@ input[type="text"]:focus, select:focus { left: 100%; } -.btn-primary { - background: linear-gradient(135deg, #2c5c34 0%, #1e4025 100%); - color: #e0e0e0; - border: 1px solid #3a7a48; -} - -.btn-primary:hover:not(:disabled) { - background: linear-gradient(135deg, #3d7d4e 0%, #2a5436 100%); - box-shadow: 0 6px 20px rgba(0, 255, 65, 0.3); - transform: translateY(-2px); -} - -.btn-secondary { - background: linear-gradient(135deg, #4a4a4a 0%, #333 100%); - color: #e0e0e0; - border: 1px solid #555; -} - -.btn-secondary:hover:not(:disabled) { - background: linear-gradient(135deg, #5a5a5a 0%, #444 100%); - box-shadow: 0 6px 20px rgba(255, 255, 255, 0.1); - transform: translateY(-2px); -} - -.btn-secondary:active { - background-color: #6a4f2a; -} - -.btn:disabled { - opacity: 0.4; - cursor: not-allowed; - transform: none !important; - box-shadow: none !important; -} - .btn-icon { color: #00ff41; font-weight: 700; - text-shadow: 0 0 5px rgba(0, 255, 65, 0.5); -} - -.btn-icon-small { - background: transparent; - border: 1px solid #555; - color: #c7c7c7; - padding: 0.25rem 0.5rem; - font-family: 'Roboto Mono', monospace; - font-size: 0.8rem; - cursor: pointer; - transition: all 0.3s ease; -} - -.btn-icon-small:hover { - border-color: #00ff41; - color: #00ff41; } /* Status Panel */ @@ -319,76 +253,59 @@ input[type="text"]:focus, select:focus { } .status-content { - padding: 1rem; + padding: 0.75rem; } .status-row { display: flex; justify-content: space-between; align-items: center; - margin-bottom: 1rem; - padding: 0.75rem; + margin-bottom: 0.6rem; + padding: 0.4rem; background: rgba(255, 255, 255, 0.02); - border-radius: 4px; - border-left: 3px solid #333; - transition: all 0.3s ease; -} - -.status-row:hover { - background: rgba(255, 255, 255, 0.05); - border-left-color: #00ff41; + border-radius: 3px; + border-left: 2px solid #333; } .status-label { color: #999; - font-size: 0.9rem; + font-size: 0.75rem; font-weight: 500; } .status-value { color: #00ff41; font-weight: 600; - text-shadow: 0 0 5px rgba(0, 255, 65, 0.3); - font-family: 'Roboto Mono', monospace; + font-size: 0.8rem; } .progress-container { - padding: 1rem; + padding: 0.75rem; background: rgba(0, 0, 0, 0.2); } .progress-info { display: flex; justify-content: space-between; - align-items: center; - font-size: 0.9rem; + font-size: 0.75rem; color: #999; - margin-bottom: 0.75rem; -} - -#progress-compact { - color: #00ff41; - font-weight: 600; - font-family: 'Roboto Mono', monospace; + margin-bottom: 0.5rem; } .progress-bar { - height: 12px; - background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%); + height: 8px; + background: #1a1a1a; border: 1px solid #444; - border-radius: 6px; + border-radius: 4px; overflow: hidden; - position: relative; - box-shadow: inset 0 2px 4px rgba(0,0,0,0.5); } .progress-fill { height: 100%; - background: linear-gradient(90deg, #00ff41 0%, #00aa2e 50%, #00ff41 100%); + background: linear-gradient(90deg, #00ff41 0%, #00aa2e 100%); width: 0%; transition: width 0.5s ease; position: relative; - border-radius: 4px; } .progress-fill::after { @@ -405,170 +322,37 @@ input[type="text"]:focus, select:focus { animation: progressShimmer 2s infinite; } +.progress-bar.scanning { + animation: progressGlow 2s ease-in-out infinite alternate; +} + @keyframes progressShimmer { 0% { transform: translateX(-100%); } 100% { transform: translateX(100%); } } -.progress-bar.scanning { - animation: progressGlow 2s ease-in-out infinite alternate; -} - @keyframes progressGlow { from { box-shadow: inset 0 2px 4px rgba(0,0,0,0.5), 0 0 5px rgba(0, 255, 65, 0.3); } to { box-shadow: inset 0 2px 4px rgba(0,0,0,0.5), 0 0 15px rgba(0, 255, 65, 0.6); } } -/* Visualization Panel */ +.placeholder-subtext { + font-size: 0.75rem; + color: #666; + line-height: 1.3; +} + +/* Graph Container - Main Focus */ .visualization-panel { grid-area: visualization; - min-height: 500px; + min-height: 600px; } .graph-container { - height: 800px; + height: 600px; position: relative; background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%); - border-top: 2px solid #444; - transition: height 0.3s ease; - overflow: hidden; -} - -.graph-controls { - position: absolute; - top: 10px; - right: 10px; - z-index: 10; - display: flex; - gap: 0.5rem; -} - -.graph-control-btn { - background: rgba(42, 42, 42, 0.9); - border: 1px solid #555; - color: #c7c7c7; - padding: 0.5rem; - font-family: 'Roboto Mono', monospace; - font-size: 0.8rem; - cursor: pointer; - transition: all 0.3s ease; -} - -.graph-control-btn:hover { - border-color: #00ff41; - color: #00ff41; - background: rgba(42, 42, 42, 1); -} - -.graph-filter-panel { - position: absolute; - bottom: 10px; - left: 10px; - z-index: 10; - background: rgba(42, 42, 42, 0.9); - border: 1px solid #555; - color: #c7c7c7; - padding: 0.75rem; - font-family: 'Roboto Mono', monospace; - font-size: 0.8rem; - /*max-height: 40%; - overflow-y: auto;*/ - display: flex; - gap: 1.5rem; -} - -.filter-column { - display: flex; - flex-direction: column; -} - -.filter-column h4 { - color: #00ff41; - margin-bottom: 0.5rem; - font-size: 0.9rem; - border-bottom: 1px solid #444; - padding-bottom: 0.25rem; -} - -.filter-column .checkbox-group { - display: flex; - flex-direction: column; - gap: 0.25rem; -} - -.filter-column label { - display: flex; - align-items: center; - gap: 0.5rem; - cursor: pointer; - transition: color 0.2s; -} - -.filter-column label:hover { - color: #00ff41; -} - -.filter-column input[type="checkbox"] { - appearance: none; - width: 12px; - height: 12px; - border: 1px solid #555; - background-color: #1a1a1a; - cursor: pointer; - position: relative; -} - -.filter-column input[type="checkbox"]:checked { - background-color: #00ff41; - border-color: #00ff41; -} - -.filter-column input[type="checkbox"]:checked::after { - content: '✓'; - position: absolute; - top: -3px; - left: 1px; - color: #1a1a1a; - font-size: 12px; -} - -.graph-context-menu { - position: absolute; - z-index: 1000; - background-color: #2a2a2a; - border: 1px solid #444; - box-shadow: 0 2px 5px rgba(0,0,0,0.5); - display: none; - font-family: 'Roboto Mono', monospace; - font-size: 0.9rem; - color: #c7c7c7; -} - -.graph-context-menu ul { - list-style: none; - padding: 0; - margin: 0; -} - -.graph-context-menu ul li { - padding: 0.75rem 1rem; - cursor: pointer; - transition: background-color 0.2s ease; -} - -.graph-context-menu ul li:hover { - background-color: #3a3a3a; - color: #00ff41; -} - -.graph-context-menu ul li[disabled] { - color: #666; - cursor: not-allowed; -} - -.graph-context-menu ul li[disabled]:hover { - background-color: #2a2a2a; - color: #666; + border-top: 1px solid #444; } .graph-placeholder { @@ -578,7 +362,6 @@ input[type="text"]:focus, select:focus { height: 100%; color: #666; text-align: center; - background: radial-gradient(circle at center, rgba(0, 255, 65, 0.05) 0%, transparent 70%); } .placeholder-content { @@ -586,88 +369,142 @@ input[type="text"]:focus, select:focus { } .placeholder-icon { - font-size: 4rem; - margin-bottom: 1rem; + font-size: 3rem; + margin-bottom: 0.75rem; color: #333; - text-shadow: 0 0 20px rgba(0, 255, 65, 0.2); } .placeholder-text { - font-size: 1.1rem; + font-size: 1rem; margin-bottom: 0.5rem; color: #999; } -.placeholder-subtext { - font-size: 0.9rem; - color: #666; -} - -.legend { - background: linear-gradient(135deg, #222 0%, #1a1a1a 100%); - border-top: 2px solid #444; - padding: 0.75rem 1rem; /* Reduced from 1.25rem 1.75rem */ +/* Graph Controls */ +.graph-controls { + position: absolute; + top: 8px; + right: 8px; + z-index: 10; display: flex; - flex-wrap: wrap; - gap: 1rem; /* Reduced from 1.5rem */ - align-items: center; + gap: 0.3rem; } -.legend-section { +.graph-control-btn, .btn-icon-small { + background: rgba(42, 42, 42, 0.9); + border: 1px solid #555; + color: #c7c7c7; + padding: 0.3rem 0.5rem; + font-family: 'Roboto Mono', monospace; + font-size: 0.7rem; + cursor: pointer; + transition: all 0.3s ease; +} + +.graph-control-btn:hover, .btn-icon-small:hover { + border-color: #00ff41; + color: #00ff41; +} + +.graph-filter-panel { + position: absolute; + bottom: 8px; + left: 8px; + z-index: 10; + background: rgba(42, 42, 42, 0.9); + border: 1px solid #555; + padding: 0.5rem; + font-size: 0.7rem; + display: flex; + gap: 1rem; +} + +.filter-column h4 { + color: #00ff41; + margin-bottom: 0.3rem; + font-size: 0.75rem; + border-bottom: 1px solid #444; + padding-bottom: 0.2rem; +} + +.filter-column .checkbox-group { display: flex; flex-direction: column; - gap: 0.5rem; + gap: 0.2rem; } -.legend-title { +.filter-column label { + display: flex; + align-items: center; + gap: 0.3rem; + cursor: pointer; font-size: 0.7rem; - color: #00ff41; - text-transform: uppercase; - font-weight: 500; - margin-bottom: 0.25rem; +} + +.filter-column input[type="checkbox"] { + appearance: none; + width: 10px; + height: 10px; + border: 1px solid #555; + background: #1a1a1a; + cursor: pointer; +} + +.filter-column input[type="checkbox"]:checked { + background: #00ff41; + border-color: #00ff41; +} + +.filter-column input[type="checkbox"]:checked::after { + content: '✓'; + position: absolute; + color: #1a1a1a; + font-size: 8px; + line-height: 10px; + text-align: center; + width: 10px; +} + +/* Legend */ +.legend { + background: #222; + border-top: 1px solid #444; + padding: 0.5rem 0.75rem; + display: flex; + flex-wrap: wrap; + gap: 0.75rem; + align-items: center; } .legend-item { display: flex; align-items: center; - gap: 0.75rem; - font-size: 0.85rem; + gap: 0.4rem; + font-size: 0.7rem; color: #ccc; - padding: 0.5rem; - border-radius: 4px; + padding: 0.3rem; + border-radius: 3px; background: rgba(255, 255, 255, 0.02); - transition: all 0.3s ease; -} - -.legend-item:hover { - background: rgba(255, 255, 255, 0.05); } .legend-color { - width: 14px; - height: 14px; + width: 10px; + height: 10px; border-radius: 50%; - border: 2px solid #444; - box-shadow: 0 0 5px rgba(0,0,0,0.5); + border: 1px solid #444; } .legend-edge { - width: 20px; + width: 16px; height: 2px; } .legend-edge.high-confidence { - background-color: #00ff41; - box-shadow: 0 0 3px rgba(0, 255, 65, 0.5); + background: #00ff41; } .legend-edge.medium-confidence { - background-color: #ff9900; - box-shadow: 0 0 3px rgba(255, 153, 0, 0.5); -} - -.legend-edge.low-confidence { - background-color: #666666; + background: #ff9900; } /* Provider Panel */ @@ -676,126 +513,90 @@ input[type="text"]:focus, select:focus { } .provider-list { - padding: 1rem; /* Reduced from 1.75rem */ + padding: 0.75rem; display: grid; - grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); - gap: 1rem; /* Reduced from 1.25rem */ + grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); + gap: 0.75rem; } .provider-item { background: linear-gradient(135deg, #2a2a2a 0%, #1e1e1e 100%); border: 1px solid #333; - border-radius: 8px; - padding: 1rem; /* Reduced from 1.25rem */ + border-radius: 6px; + padding: 0.75rem; transition: all 0.3s ease; - position: relative; - overflow: hidden; -} - -/* Remove provider item top line */ -.provider-item::before { - display: none; } .provider-item:hover { border-color: #444; - box-shadow: 0 8px 25px rgba(0,0,0,0.3); - transform: translateY(-2px); -} - -.provider-item:hover::before { - background: linear-gradient(90deg, #00ff41 0%, #00aa2e 100%); -} - -.provider-name { - font-weight: 600; - color: #e0e0e0; - font-size: 1.1rem; -} - -.provider-status { - font-size: 0.8rem; - padding: 0.4rem 0.8rem; - border-radius: 4px; - font-weight: 600; - text-transform: uppercase; - letter-spacing: 0.5px; -} - -.provider-status.enabled { - background: linear-gradient(135deg, #2c5c34 0%, #1e4025 100%); - color: #e0e0e0; - border: 1px solid #3a7a48; -} - -.provider-status.disabled { - background: linear-gradient(135deg, #5c2c2c 0%, #402020 100%); - color: #e0e0e0; - border: 1px solid #7a3a3a; -} - -.provider-status.api-key-required { - background: linear-gradient(135deg, #5c4c2c 0%, #403620 100%); - color: #e0e0e0; - border: 1px solid #7a6a3a; -} - -.provider-stats, .provider-task-stats { - font-size: 0.8rem; - color: #999; - display: grid; - grid-template-columns: 1fr 1fr; - gap: 0.75rem; - margin-top: 0.5rem; -} - -.provider-task-stats { - border-top: 1px solid #333; - padding-top: 0.5rem; - margin-top: 0.5rem; -} - -.provider-stat { - display: flex; - flex-direction: column; - gap: 0.25rem; -} - -.provider-stat-label { - color: #999; - font-size: 0.8rem; - font-weight: 500; -} - -.provider-stat-value { - color: #00ff41; - font-weight: 600; - font-family: 'Roboto Mono', monospace; - font-size: 1rem; + transform: translateY(-1px); } .provider-header { display: flex; justify-content: space-between; align-items: center; - margin-bottom: 1rem; + margin-bottom: 0.5rem; } -.node-info-popup { - position: fixed; - background: rgba(42, 42, 42, 0.95); - border: 1px solid #555; - padding: 1rem; - border-radius: 4px; - color: #c7c7c7; - font-family: 'Roboto Mono', monospace; - font-size: 0.8rem; - max-width: 300px; - z-index: 1001; - box-shadow: 0 4px 6px rgba(0,0,0,0.3); +.provider-name { + font-weight: 600; + color: #e0e0e0; + font-size: 0.9rem; } -/* === ENHANCED MODAL STYLING === */ +.provider-status { + font-size: 0.65rem; + padding: 0.2rem 0.4rem; + border-radius: 3px; + font-weight: 600; + text-transform: uppercase; +} + +.provider-status.enabled { + background: #2c5c34; + color: #e0e0e0; + border: 1px solid #3a7a48; +} + +.provider-status.disabled { + background: #5c2c2c; + color: #e0e0e0; + border: 1px solid #7a3a3a; +} + +.provider-status.api-key-required { + background: #5c4c2c; + color: #e0e0e0; + border: 1px solid #7a6a3a; +} + +.provider-stats { + font-size: 0.7rem; + color: #999; + display: grid; + grid-template-columns: 1fr 1fr; + gap: 0.5rem; +} + +.provider-stat { + display: flex; + flex-direction: column; + gap: 0.1rem; +} + +.provider-stat-label { + color: #999; + font-size: 0.65rem; +} + +.provider-stat-value { + color: #00ff41; + font-weight: 600; + font-size: 0.75rem; +} + +/* Modal Styles - Simplified */ .modal { display: none; position: fixed; @@ -804,104 +605,45 @@ input[type="text"]:focus, select:focus { top: 0; width: 100%; height: 100%; - overflow: auto; - backdrop-filter: blur(8px); - background-color: rgba(0,0,0,0.7); + background: rgba(0,0,0,0.7); } .modal-content { background: linear-gradient(135deg, #1e1e1e 0%, #252525 100%); - margin: 10% auto; + margin: 5% auto; padding: 0; border: 1px solid #444; - border-radius: 8px; - width: 60%; - max-width: 800px; - max-height: 85vh; + border-radius: 6px; + width: 70%; + max-width: 900px; + max-height: 100vh; overflow: hidden; - box-shadow: 0 20px 60px rgba(0,0,0,0.7); - animation: modalSlideIn 0.3s ease-out; -} - -@keyframes modalSlideIn { - from { - opacity: 0; - transform: translateY(-20px) scale(0.95); - } - to { - opacity: 1; - transform: translateY(0) scale(1); - } -} - -.modal-opening { - animation: modalFadeIn 0.3s ease-out; -} - -.modal-closing { - animation: modalFadeOut 0.2s ease-in; -} - -@keyframes modalFadeIn { - from { - opacity: 0; - transform: scale(0.9); - } - to { - opacity: 1; - transform: scale(1); - } -} - -@keyframes modalFadeOut { - from { - opacity: 1; - transform: scale(1); - } - to { - opacity: 0; - transform: scale(0.95); - } } .modal-header { - background: linear-gradient(90deg, #2a2a2a 0%, #1e1e1e 100%); - border-bottom: 2px solid #444; - padding: 1.5rem 2rem; - position: sticky; - top: 0; - z-index: 10; + background: #2a2a2a; + border-bottom: 1px solid #444; + padding: 0.75rem 1rem; display: flex; justify-content: space-between; align-items: center; } .modal-header h3 { - font-size: 1.3rem; + font-size: 1rem; margin: 0; color: #00ff41; - text-shadow: 0 0 10px rgba(0, 255, 65, 0.3); display: flex; align-items: center; } -.modal-title-icon { - margin-right: 0.5rem; - font-size: 1.2rem; -} - -.modal-title-text { - font-family: 'Special Elite', monospace; -} - .modal-close { background: none; border: none; color: #c7c7c7; - font-size: 24px; + font-size: 18px; cursor: pointer; - padding: 0 10px; - transition: color 0.3s ease; + padding: 0 8px; } .modal-close:hover { @@ -909,33 +651,154 @@ input[type="text"]:focus, select:focus { } .modal-body { - padding: 0; - max-height: calc(85vh - 120px); + max-height: calc(90vh - 80px); overflow-y: auto; - scroll-behavior: smooth; } +.modal-details { + padding: 0.75rem; + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +/* Modal Content Styles */ .modal-section { + border: 1px solid #333; + border-radius: 4px; + background: #2a2a2a; +} + +.section-card { + background: linear-gradient(135deg, #2a2a2a 0%, #1f1f1f 100%); + border: 1px solid #333; + border-radius: 6px; + overflow: hidden; + transition: all 0.3s ease; + margin-bottom: 0.5rem; +} + +.section-card:hover { + border-color: #444; + box-shadow: 0 4px 12px rgba(0,0,0,0.3); +} + +.section-card.collapsed .metadata-content { + display: none; +} + +.section-header { + display: flex; + justify-content: space-between; + align-items: center; + padding: 0.5rem 0.75rem; + background: rgba(0, 255, 65, 0.05); + border-bottom: 1px solid #333; + cursor: pointer; +} + +.section-header h4 { + margin: 0; + font-size: 0.85rem; + color: #00ff41; + display: flex; + align-items: center; + gap: 0.4rem; + pointer-events: none; +} + +.section-icon { + font-size: 0.9rem; +} + +.toggle-section-btn { + background: none; + border: none; + color: #999; + cursor: pointer; + padding: 0.2rem; + transition: color 0.3s ease; + font-size: 0.7rem; + display: flex; + align-items: center; + justify-content: center; +} + +.toggle-section-btn:hover { + color: #00ff41; +} + +.metadata-section { border: 1px solid #333; border-radius: 6px; overflow: hidden; background: #2a2a2a; } -.modal-section details { - border: none; +.metadata-content { + padding: 0.75rem; + background: #1e1e1e; + font-size: 0.75rem; +} + +/* Formatted Object HTML Styles */ +.metadata-content ul { + list-style: none; + padding-left: 1rem; + margin: 0; +} + +.metadata-content li { + margin-bottom: 0.4rem; + padding: 0.2rem 0; + border-bottom: 1px solid rgba(255,255,255,0.05); +} + +.metadata-content li:last-child { + border-bottom: none; + margin-bottom: 0; +} + +.metadata-content details { + margin: 0.3rem 0; +} + +.metadata-content summary { + cursor: pointer; + padding: 0.3rem; + background: rgba(255,255,255,0.02); + border-radius: 3px; + margin-bottom: 0.3rem; + font-weight: 500; +} + +.metadata-content summary:hover { + background: rgba(255,255,255,0.05); +} + +.metadata-content strong { + color: #00ff41; + font-weight: 500; + margin-right: 0.5rem; +} + +.metadata-content .no-data { + color: #666; + font-style: italic; + text-align: center; + padding: 1rem; } .modal-section summary { - padding: 0.75rem 1rem; + padding: 0.5rem 0.75rem; background: rgba(0, 255, 65, 0.05); - border-bottom: 1px solid #333; cursor: pointer; display: flex; justify-content: space-between; align-items: center; font-weight: 500; color: #00ff41; + font-size: 0.8rem; list-style: none; } @@ -946,336 +809,108 @@ input[type="text"]:focus, select:focus { .modal-section summary::after { content: '▼'; transition: transform 0.3s ease; - font-size: 0.8rem; + font-size: 0.7rem; } .modal-section[open] summary::after { transform: rotate(180deg); } +.modal-section .count-badge { + background: #444; + color: #fff; + padding: 0.15rem 0.4rem; + border-radius: 2px; + font-size: 0.65rem; + font-weight: 600; + min-width: 20px; + text-align: center; +} + .modal-section-content { - padding: 0.75rem 1rem; + padding: 0.5rem 0.75rem; background: #1e1e1e; } -/* Custom scrollbar for modal */ -.modal-body::-webkit-scrollbar { - width: 8px; -} - -.modal-body::-webkit-scrollbar-track { - background: #1a1a1a; -} - -.modal-body::-webkit-scrollbar-thumb { - background: #444; - border-radius: 4px; -} - -.modal-body::-webkit-scrollbar-thumb:hover { - background: #555; -} - -/* === ENHANCED NODE DETAILS LAYOUT === */ -.modal-details { - padding: 1rem; /* Reduced from 1.5rem */ - display: flex; - flex-direction: column; - gap: 0.75rem; /* Reduced from 1.5rem */ -} - -/* Node Header */ +/* Node Details */ .node-header { display: flex; align-items: center; - gap: 0.75rem; /* Reduced from 1rem */ - padding: 0.75rem; /* Reduced from 1rem */ - background: linear-gradient(135deg, #2a2a2a 0%, #333 100%); - border-radius: 8px; - border-left: 4px solid #00ff41; + gap: 0.5rem; + padding: 0.5rem; + background: #333; + border-radius: 4px; } .node-type-badge { - padding: 0.4rem 0.8rem; - border-radius: 4px; - font-size: 0.8rem; + padding: 0.2rem 0.5rem; + border-radius: 3px; + font-size: 0.65rem; font-weight: 600; text-transform: uppercase; - letter-spacing: 0.5px; } .node-type-domain { background: #00ff41; color: #000; } .node-type-ip { background: #ff9900; color: #000; } -.node-type-asn { background: #00aaff; color: #000; } +.node-type-isp { background: #00aaff; color: #000; } .node-type-large_entity { background: #ff6b6b; color: #fff; } .node-type-correlation_object { background: #9620c0; color: #fff; } .node-id-display { - font-family: 'Roboto Mono', monospace; - font-size: 1.1rem; + font-size: 0.85rem; color: #e0e0e0; word-break: break-all; } -/* Quick Stats Bar */ .quick-stats { display: flex; - gap: 0.75rem; /* Reduced from 1rem */ - padding: 0.5rem; /* Reduced from 0.75rem */ + gap: 0.5rem; + padding: 0.4rem; background: rgba(42, 42, 42, 0.5); - border-radius: 6px; - border: 1px solid #333; + border-radius: 4px; } .stat-item { display: flex; align-items: center; - gap: 0.5rem; + gap: 0.3rem; flex: 1; text-align: center; } .stat-icon { - font-size: 1.2rem; + font-size: 1rem; color: #00ff41; } .stat-value { - font-size: 1.1rem; + font-size: 0.9rem; font-weight: 600; color: #fff; } .stat-label { - font-size: 0.8rem; + font-size: 0.65rem; color: #999; } -/* Section Cards */ -.section-card { - background: linear-gradient(135deg, #2a2a2a 0%, #1f1f1f 100%); - border: 1px solid #333; - border-radius: 8px; - overflow: hidden; - transition: all 0.3s ease; - animation: slideInUp 0.3s ease-out; -} - -@keyframes slideInUp { - from { - opacity: 0; - transform: translateY(10px); - } - to { - opacity: 1; - transform: translateY(0); - } -} - -.section-card:hover { - border-color: #444; - box-shadow: 0 4px 12px rgba(0,0,0,0.3); -} - -.section-card.collapsed .section-header .toggle-icon { - transform: rotate(-90deg); -} - -.section-card.collapsed, -.section-card.collapsed .metadata-content { - display: none; -} - -.section-header { - display: flex; - justify-content: space-between; - align-items: center; - padding: 0.75rem 1rem; /* Reduced from 1rem 1.5rem */ - background: rgba(0, 255, 65, 0.05); - border-bottom: 1px solid #333; - cursor: pointer; /* Make clickable for collapse */ -} - -.section-header h4 { - margin: 0; - font-size: 1rem; - color: #00ff41; - display: flex; - align-items: center; - gap: 0.5rem; -} - -.section-icon { - font-size: 1.1rem; -} - -.count-badge, .merge-badge, .entity-badge { - background: #444; - color: #fff; - padding: 0.25rem 0.5rem; - border-radius: 3px; - font-size: 0.8rem; - font-weight: 600; -} - -.merge-badge { background: #9620c0; } -.entity-badge { background: #ff6b6b; } - -.toggle-section-btn { - background: none; - border: none; - color: #999; - cursor: pointer; - padding: 0.25rem; - transition: color 0.3s ease; -} - -.toggle-section-btn:hover { - color: #00ff41; -} - -.toggle-icon { - transition: transform 0.3s ease; -} - -/* === RELATIONSHIP SECTIONS === */ -.relationship-list { - padding: 1rem 1.5rem; - display: flex; - flex-direction: column; - gap: 0.75rem; -} - -.relationship-item { - display: flex; - justify-content: space-between; - align-items: center; - padding: 0.75rem; - background: rgba(255, 255, 255, 0.02); - border-radius: 4px; - border-left: 3px solid #444; - transition: all 0.3s ease; - animation: fadeIn 0.3s ease-out; -} - -.relationship-compact { - display: flex; - flex-direction: column; - gap: 0.5rem; - max-height: 200px; - overflow-y: auto; -} - -.relationship-compact-item { - display: flex; - justify-content: space-between; - align-items: center; - padding: 0.5rem; - background: rgba(255, 255, 255, 0.02); - border-radius: 4px; - font-size: 0.85rem; -} - -.relationship-compact-item:hover { - background: rgba(255, 255, 255, 0.05); -} - -@keyframes fadeIn { - from { opacity: 0; } - to { opacity: 1; } -} - -.relationship-item:hover { - background: rgba(255, 255, 255, 0.05); - border-left-color: #00ff41; -} - -.relationship-source, .relationship-target { - font-family: 'Roboto Mono', monospace; - color: #e0e0e0; - cursor: pointer; - flex: 1; -} - -.node-link-compact { - color: #00aaff; - text-decoration: none; - cursor: pointer; - font-family: 'Roboto Mono', monospace; -} - -.node-link-compact:hover { - color: #00ff41; - text-decoration: underline; -} - -.node-link::before { - content: ''; - position: absolute; - top: 50%; - left: 50%; - width: 0; - height: 0; - background: rgba(0, 255, 65, 0.1); - border-radius: 50%; - transform: translate(-50%, -50%); - transition: width 0.6s, height 0.6s; -} - -.node-link:hover::before { - width: 300px; - height: 300px; -} - -.relationship-type { - display: flex; - flex-direction: column; - align-items: flex-end; - gap: 0.25rem; -} - -.relation-label { - font-size: 0.8rem; - color: #999; - background: #333; - padding: 0.2rem 0.5rem; - border-radius: 3px; -} - -.confidence-compact { - font-size: 0.7rem; - padding: 0.1rem 0.3rem; - border-radius: 2px; - font-weight: bold; -} - -.confidence-indicator { - font-size: 0.7rem; - letter-spacing: 1px; -} - -.confidence-compact.high { background: #00ff41; color: #000; } -.confidence-compact.medium { background: #ff9900; color: #000; } -.confidence-compact.low { background: #ff6b6b; color: #fff; } - - -/* === ATTRIBUTES SECTION === */ +/* Attribute Lists */ .attribute-list { display: flex; flex-direction: column; - gap: 0.5rem; - max-height: 300px; + gap: 0.3rem; + max-height: 250px; overflow-y: auto; } .attribute-item-compact { display: grid; grid-template-columns: 1fr 2fr; - gap: 0.5rem; - padding: 0.5rem; + gap: 0.4rem; + padding: 0.3rem; background: rgba(255, 255, 255, 0.02); - border-radius: 4px; - font-size: 0.85rem; + border-radius: 3px; + font-size: 0.75rem; } .attribute-key-compact { @@ -1285,573 +920,224 @@ input[type="text"]:focus, select:focus { .attribute-value-compact { color: #e0e0e0; - font-family: 'Roboto Mono', monospace; word-break: break-word; } - - -.category-header { - font-size: 0.9rem; - color: #ff9900; - font-weight: 600; - margin-bottom: 0.75rem; - padding-bottom: 0.25rem; - border-bottom: 1px solid #333; -} - - - -.attr-key { - font-size: 0.8rem; - color: #999; - margin-bottom: 0.25rem; - font-weight: 500; -} - -.attr-value { - color: #e0e0e0; - font-family: 'Roboto Mono', monospace; - font-size: 0.9rem; - word-break: break-word; -} - -.array-value { - border: 1px solid #333; - border-radius: 4px; - overflow: hidden; -} - -.array-summary { - background: #333; - padding: 0.5rem; - font-size: 0.8rem; - color: #999; - cursor: pointer; - transition: background-color 0.3s ease; -} - -.array-summary:hover { - background: #3a3a3a; -} - -.array-items { - padding: 0.5rem; - max-height: 120px; +/* Relationship Lists */ +.relationship-compact { + display: flex; + flex-direction: column; + gap: 0.3rem; + max-height: 180px; overflow-y: auto; } -.array-value.expanded .array-items { - max-height: none; -} - -.array-item { - padding: 0.25rem 0; - border-bottom: 1px solid #333; - font-size: 0.8rem; -} - -.array-item:last-child { - border-bottom: none; -} - -.array-more { - padding: 0.25rem 0; - font-style: italic; - color: #666; - font-size: 0.8rem; -} - -.array-display { - font-family: 'Roboto Mono', monospace; - font-size: 0.8rem; -} - -.array-display-item { - padding: 0.25rem 0; - border-bottom: 1px solid #333; -} - -.array-display-item:last-child { - border-bottom: none; -} - -.object-value .compact-object { - display: flex; - flex-direction: column; - gap: 0.25rem; -} - -.object-display { - font-family: 'Roboto Mono', monospace; - font-size: 0.8rem; - background: rgba(0, 0, 0, 0.2); - padding: 0.5rem; - border-radius: 4px; - border: 1px solid #333; -} - -.compact-pair { - display: flex; - gap: 0.5rem; -} - -.compact-key { - color: #999; - font-size: 0.8rem; -} - -.compact-value { - color: #e0e0e0; - font-size: 0.8rem; -} - -.compact-more { - font-style: italic; - color: #666; - font-size: 0.8rem; -} - -.certificate-list { - display: flex; - flex-direction: column; - gap: 0.5rem; - max-height: 250px; - overflow-y: auto; -} - -.certificate-item { - background: rgba(255, 255, 255, 0.02); - border: 1px solid #333; - border-radius: 4px; - padding: 0.5rem; -} - -.certificate-summary { +.relationship-compact-item { display: flex; justify-content: space-between; align-items: center; - font-size: 0.85rem; + padding: 0.3rem; + background: rgba(255, 255, 255, 0.02); + border-radius: 3px; + font-size: 0.75rem; } -.certificate-issuer { - color: #00ff41; - font-weight: 500; -} - -.certificate-status { - font-size: 0.7rem; - padding: 0.1rem 0.3rem; - border-radius: 2px; - font-weight: bold; -} - -.certificate-status.valid { background: #00ff41; color: #000; } -.certificate-status.invalid { background: #ff6b6b; color: #fff; } -.certificate-status.expired { background: #ff9900; color: #000; } - -.cert-summary-grid { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(100px, 1fr)); +.relationship-list { + display: flex; + flex-direction: column; gap: 0.5rem; - margin-bottom: 1rem; + padding: 0.75rem; } -.cert-stat-item { - text-align: center; +.relationship-item { + display: flex; + justify-content: space-between; + align-items: center; padding: 0.5rem; background: rgba(255, 255, 255, 0.02); - border: 1px solid #333; - border-radius: 4px; + border-radius: 3px; + border-left: 2px solid #444; + font-size: 0.75rem; } -.cert-stat-value { - font-size: 1.2rem; - font-weight: 600; - color: #00ff41; - font-family: 'Roboto Mono', monospace; +.relationship-item:hover { + background: rgba(255, 255, 255, 0.05); + border-left-color: #00ff41; } -.cert-stat-label { - font-size: 0.8rem; +.relationship-source, .relationship-target { + color: #e0e0e0; + cursor: pointer; + flex: 1; +} + +.relationship-type { + display: flex; + flex-direction: column; + align-items: flex-end; + gap: 0.2rem; +} + +.relation-label { + font-size: 0.7rem; color: #999; - margin-top: 0.25rem; + background: #333; + padding: 0.15rem 0.3rem; + border-radius: 2px; } -/* Status badges - extends existing badge system */ -.cert-status.valid { background: #00ff41; color: #000; } -.cert-status.invalid { background: #ff6b6b; color: #fff; } -.cert-status.warning { background: #ff9900; color: #000; } +.confidence-indicator { + font-size: 0.6rem; + letter-spacing: 1px; +} -/* Certificate links */ -.cert-link { +.node-link-compact { color: #00aaff; text-decoration: none; - font-size: 0.85rem; + cursor: pointer; + font-size: 0.75rem; } -.cert-link:hover { +.node-link-compact:hover { color: #00ff41; text-decoration: underline; } -/* === CORRELATION OBJECT LAYOUT === */ -.correlation-grid { - padding: 1rem 1.5rem; - display: flex; - flex-direction: column; - gap: 0.75rem; -} - -.correlation-item { - background: rgba(255, 255, 255, 0.02); - border: 1px solid #333; - border-radius: 4px; - overflow: hidden; - transition: all 0.3s ease; -} - -.correlation-item.expanded { - border-color: #00ff41; -} - -.correlation-preview { - padding: 0.75rem; - font-family: 'Roboto Mono', monospace; - color: #e0e0e0; - display: flex; - justify-content: space-between; - align-items: center; -} - -.expand-btn { - background: none; - border: none; - color: #999; +.node-link { + color: #00aaff; + text-decoration: none; cursor: pointer; - padding: 0.5rem; - transition: color 0.3s ease; + font-size: 0.75rem; position: relative; - overflow: hidden; } -.expand-btn:hover { +.node-link:hover { color: #00ff41; + text-decoration: underline; } -.expand-btn::before { - content: ''; - position: absolute; - top: 50%; - left: 50%; - width: 0; - height: 0; - background: rgba(0, 255, 65, 0.1); - border-radius: 50%; - transform: translate(-50%, -50%); - transition: width 0.6s, height 0.6s; +.modal-title-icon { + margin-right: 0.4rem; + font-size: 1rem; } -.expand-btn:hover::before { - width: 300px; - height: 300px; +.modal-title-text { + font-family: 'Special Elite', monospace; } -.expand-icon { - transition: transform 0.3s ease; -} - -.correlation-item.expanded .expand-icon { - transform: rotate(180deg); -} - -.correlation-full { - padding: 0.75rem; - background: rgba(0, 0, 0, 0.3); - border-top: 1px solid #333; - font-family: 'Roboto Mono', monospace; - font-size: 0.8rem; - color: #ccc; - word-break: break-all; -} - -.correlation-full.hidden { - display: none; -} - -.correlation-item.expanded .correlation-full.hidden { - display: block; -} - -.correlation-attr-name { - color: #00ff41; +.merge-badge, .count-badge { + background: #444; + color: #fff; + padding: 0.15rem 0.3rem; + border-radius: 2px; + font-size: 0.65rem; font-weight: 600; - text-transform: capitalize; -} - -.correlation-hint { - color: #999; - cursor: help; - opacity: 0.7; -} - -.correlation-hint:hover { - opacity: 1; - color: #00ff41; -} - -.node-list { - padding: 1rem 1.5rem; - display: flex; - flex-direction: column; - gap: 0.5rem; -} - -.node-link-item { - display: flex; - align-items: center; - gap: 0.75rem; - padding: 0.5rem; - background: rgba(255, 255, 255, 0.02); - border-radius: 4px; - transition: all 0.3s ease; - cursor: pointer; - animation: fadeIn 0.3s ease-out; - position: relative; - overflow: hidden; -} - -.node-link-item:hover { - background: rgba(255, 255, 255, 0.05); - border-left: 3px solid #00ff41; -} - -.node-link-item::before { - content: ''; - position: absolute; - top: 50%; - left: 50%; - width: 0; - height: 0; - background: rgba(0, 255, 65, 0.1); - border-radius: 50%; - transform: translate(-50%, -50%); - transition: width 0.6s, height 0.6s; -} - -.node-link-item:hover::before { - width: 300px; - height: 300px; -} - -.node-icon { - color: #00ff41; - font-size: 0.8rem; -} - -.node-name { - flex: 1; - font-family: 'Roboto Mono', monospace; - color: #e0e0e0; -} - -.navigate-btn { - background: none; - border: 1px solid #444; - color: #999; - border-radius: 3px; - padding: 0.25rem 0.5rem; - cursor: pointer; - transition: all 0.3s ease; -} - -.navigate-btn:hover { - border-color: #00ff41; - color: #00ff41; -} - -/* === LARGE ENTITY LAYOUT === */ -.entity-stats { - padding: 1rem 1.5rem; -} - -.stat-row { - display: flex; - justify-content: space-between; - padding: 0.5rem 0; - border-bottom: 1px solid #333; -} - -.stat-row:last-child { - border-bottom: none; -} - -.entity-node-grid { - padding: 1rem 1.5rem; - display: grid; - grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); - gap: 1rem; -} - -.entity-node-card { - background: rgba(255, 255, 255, 0.02); - border: 1px solid #333; - border-radius: 4px; - overflow: hidden; - transition: all 0.3s ease; -} - -.entity-node-card.expanded { - border-color: #00ff41; -} - -.entity-node-header { - padding: 0.75rem; - background: rgba(0, 0, 0, 0.2); - cursor: pointer; - display: flex; - align-items: center; - gap: 0.5rem; - transition: all 0.3s ease; - position: relative; - overflow: hidden; -} - -.entity-node-header:hover { - background: rgba(0, 255, 65, 0.1); -} - -.entity-node-header::before { - content: ''; - position: absolute; - top: 50%; - left: 50%; - width: 0; - height: 0; - background: rgba(0, 255, 65, 0.1); - border-radius: 50%; - transform: translate(-50%, -50%); - transition: width 0.6s, height 0.6s; -} - -.entity-node-header:hover::before { - width: 300px; - height: 300px; -} - -.expand-indicator { - margin-left: auto; - transition: transform 0.3s ease; -} - -.entity-node-card.expanded .expand-indicator { - transform: rotate(180deg); -} - -.entity-node-details { - display: none; - padding: 1rem; - border-top: 1px solid #333; -} - -.entity-node-card.expanded .entity-node-details { - display: block; -} - -/* === DESCRIPTION SECTION === */ -.description-content { - padding: 1rem 1.5rem; - color: #ccc; - line-height: 1.6; - font-style: italic; } /* Footer */ .footer { - background: linear-gradient(135deg, #0a0a0a 0%, #1a1a1a 100%); - border-top: 2px solid #333; - padding: 1.25rem 2rem; + background: #0a0a0a; + border-top: 1px solid #333; + padding: 0.75rem; text-align: center; - font-size: 0.85rem; + font-size: 0.7rem; color: #999; } .footer-content { - max-width: 1400px; + /*max-width: 1400px;*/ margin: 0 auto; display: flex; justify-content: center; align-items: center; - gap: 1rem; + gap: 0.75rem; flex-wrap: wrap; } .footer-separator { - margin: 0 1rem; color: #555; } -/* API Key Modal Styles */ -.apikey-section { - margin-bottom: 1.5rem; +/* Message System */ +.message-container { + position: fixed; + top: 15px; + right: 15px; + z-index: 1002; + max-width: 350px; } -.apikey-section label { - display: block; - margin-bottom: 0.5rem; - color: #c7c7c7; - font-size: 0.9rem; - font-weight: 500; -} - -.apikey-section input[type="password"] { - width: 100%; - padding: 0.75rem; - background-color: #1a1a1a; - border: 1px solid #555; - color: #c7c7c7; - font-family: 'Roboto Mono', monospace; - font-size: 0.9rem; - transition: border-color 0.3s ease, box-shadow 0.3s ease; -} - -.apikey-section input[type="password"]:focus { - outline: none; - border-color: #00ff41; - box-shadow: 0 0 5px rgba(0, 255, 65, 0.5); -} - -.apikey-help { - font-size: 0.8rem; - color: #666; - margin-top: 0.25rem; - font-style: italic; -} - -.api-key-set-message { - display: flex; - justify-content: space-between; - align-items: center; - padding: 0.75rem; - background: #2c5c34; - border: 1px solid #3a7a48; +.message-toast { + margin-bottom: 8px; border-radius: 4px; - color: #e0e0e0; + font-size: 0.8rem; + animation: slideInRight 0.3s ease-out; } -.api-key-set-text { - font-family: 'Roboto Mono', monospace; - font-size: 0.9rem; +.message-toast.success { + background: #2c5c34; + border-left: 3px solid #00ff41; +} + +.message-toast.error { + background: #5c2c2c; + border-left: 3px solid #ff6b6b; +} + +.message-toast.warning { + background: #5c4c2c; + border-left: 3px solid #ff9900; +} + +.message-toast.info { + background: #2c3e5c; + border-left: 3px solid #00aaff; +} + +/* Settings Modal Specific */ +.provider-toggle { + appearance: none !important; + width: 12px !important; + height: 12px !important; + border: 1px solid #555 !important; + background: #1a1a1a !important; + cursor: pointer !important; + border-radius: 2px !important; +} + +.provider-toggle:checked { + background: #00ff41 !important; + border-color: #00ff41 !important; +} + +.provider-toggle:checked::after { + content: '✓' !important; + position: absolute !important; + top: -2px !important; + left: 1px !important; + color: #1a1a1a !important; + font-size: 10px !important; + font-weight: bold !important; +} + +.api-key-status-row { + padding: 0.5rem; + border-radius: 3px; + border: 1px solid; + transition: all 0.3s ease; + margin-top: 0.4rem; + font-size: 0.75rem; } .clear-api-key-btn { + font-size: 0.7rem !important; + padding: 0.3rem 0.5rem !important; + min-width: auto !important; background: none; border: 1px solid #555; color: #c7c7c7; - padding: 0.25rem 0.5rem; - border-radius: 3px; + border-radius: 2px; cursor: pointer; - font-size: 0.8rem; transition: all 0.3s ease; } @@ -1860,174 +1146,105 @@ input[type="text"]:focus, select:focus { color: #ff6b6b; } -/* Message Toasts */ -.message-container { - position: fixed; - top: 20px; - right: 20px; - z-index: 1002; - max-width: 400px; - pointer-events: auto; +.clear-api-key-btn:disabled { + opacity: 0.5 !important; + cursor: not-allowed !important; } -.message-toast { - margin-bottom: 10px; - border-radius: 4px; +input[type="password"] { + width: 100%; + padding: 0.5rem; + background: #1a1a1a; + border: 1px solid #444; + border-radius: 3px; + color: #e0e0e0; font-family: 'Roboto Mono', monospace; - font-size: 0.9rem; - box-shadow: 0 4px 6px rgba(0,0,0,0.3); - animation: slideInRight 0.3s ease-out; - pointer-events: auto; + font-size: 0.75rem; + transition: border-color 0.3s ease; } -.message-toast.success { - background: #2c5c34; - border-left: 4px solid #00ff41; +input[type="password"]:focus { + outline: none; + border-color: #00ff41; + box-shadow: 0 0 5px rgba(0, 255, 65, 0.3); } -.message-toast.error { - background: #5c2c2c; - border-left: 4px solid #ff6b6b; +.apikey-help { + font-size: 0.65rem; + color: #666; + margin-top: 0.3rem; + font-style: italic; + line-height: 1.3; } -.message-toast.warning { - background: #5c4c2c; - border-left: 4px solid #ff9900; +/* Context Menu */ +.graph-context-menu { + position: fixed; + z-index: 1000; + background: #2a2a2a; + border: 1px solid #444; + border-radius: 4px; + display: none; + font-size: 0.8rem; + min-width: 160px; } -.message-toast.info { - background: #2c3e5c; - border-left: 4px solid #00aaff; +.graph-context-menu ul { + list-style: none; + padding: 0.3rem 0; + margin: 0; } -@keyframes slideInRight { - from { - transform: translateX(100%); - opacity: 0; - } - to { - transform: translateX(0); - opacity: 1; - } +.graph-context-menu ul li { + padding: 0.5rem 0.75rem; + cursor: pointer; + display: flex; + align-items: center; + gap: 0.4rem; } -@keyframes slideOutRight { - from { - transform: translateX(0); - opacity: 1; - } - to { - transform: translateX(100%); - opacity: 0; - } +.graph-context-menu ul li:hover { + background: #3a3a3a; + color: #00ff41; } -/* Large Entity Node Details */ -.large-entity-nodes-list { - margin-top: 1rem; +.menu-icon { + font-size: 0.8rem; + width: 1rem; + text-align: center; } -.large-entity-node-details { - margin-bottom: 0.5rem; - border: 1px solid #333; +/* Scrollbars */ +::-webkit-scrollbar { + width: 6px; +} + +::-webkit-scrollbar-track { + background: #1a1a1a; +} + +::-webkit-scrollbar-thumb { + background: #444; border-radius: 3px; } -.large-entity-node-details summary { - padding: 0.5rem; - background-color: #3a3a3a; - cursor: pointer; - outline: none; +::-webkit-scrollbar-thumb:hover { + background: #555; } -.large-entity-node-details summary:hover { - background-color: #4a4a4a; +/* Animations */ +@keyframes slideInRight { + from { transform: translateX(100%); opacity: 0; } + to { transform: translateX(0); opacity: 1; } } -.large-entity-node-details .detail-row { - margin-left: 1rem; - margin-right: 1rem; -} - -.large-entity-node-details .detail-section-header { - margin-left: 1rem; - margin-right: 1rem; -} - -/* Legacy Detail Styles for Compatibility */ -.detail-row { - display: flex; - justify-content: space-between; - align-items: center; - margin-bottom: 0.75rem; - padding-bottom: 0.25rem; - border-bottom: 1px solid #333; -} - -.detail-label { - color: #999; - font-weight: 500; -} - -.detail-value { - color: #c7c7c7; - word-break: break-word; -} - -.detail-section-header { - font-size: 1rem; - color: #00ff41; - font-weight: 500; - margin-top: 1.5rem; - margin-bottom: 0.75rem; - padding-bottom: 0.5rem; - border-bottom: 1px solid #444; -} - -.detail-section-header:first-of-type { - margin-top: 0; -} - -.copy-btn { - background: none; - border: none; - color: #666; - cursor: pointer; - font-size: 1rem; - margin-left: 10px; - transition: color 0.3s ease; -} - -.copy-btn:hover { - color: #00ff41; -} - -.status-icon { - margin-left: 5px; -} - -.error { - color: #ff6b6b !important; - border-color: #ff6b6b !important; -} - -.success { - color: #00ff41 !important; +@keyframes fadeIn { + from { opacity: 0; } + to { opacity: 1; } } .fade-in { - animation: enhancedFadeIn 0.5s ease-out; -} - -@keyframes enhancedFadeIn { - from { - opacity: 0; - transform: translateY(10px); - } - to { - opacity: 1; - transform: translateY(0); - } + animation: fadeIn 0.3s ease-out; } /* Utility Classes */ @@ -2035,7 +1252,26 @@ input[type="text"]:focus, select:focus { display: none !important; } -/* Graph specific styles */ +.success { + color: #00ff41 !important; +} + +.error { + color: #ff6b6b !important; + border-color: #ff6b6b !important; +} + +/* Status Value Animations */ +.status-value.status-running { + animation: pulse 1.5s infinite; +} + +@keyframes pulse { + 0%, 100% { opacity: 1; } + 50% { opacity: 0.7; } +} + +/* Graph Library Overrides */ .vis-network { background-color: #1a1a1a !important; } @@ -2045,73 +1281,46 @@ input[type="text"]:focus, select:focus { border: 1px solid #444 !important; color: #c7c7c7 !important; font-family: 'Roboto Mono', monospace !important; - font-size: 0.8rem !important; + font-size: 0.7rem !important; + border-radius: 3px !important; } -/* === RESPONSIVE DESIGN === */ +.node-info-popup { + position: fixed; + background: rgba(42, 42, 42, 0.95); + border: 1px solid #555; + padding: 0.75rem; + border-radius: 4px; + color: #c7c7c7; + font-family: 'Roboto Mono', monospace; + font-size: 0.7rem; + max-width: 280px; + z-index: 1001; + box-shadow: 0 4px 6px rgba(0,0,0,0.3); +} + +/* Responsive */ @media (max-width: 768px) { .main-content { - padding: 0.75rem; - gap: 0.75rem; - } - - .header { - padding: 1rem; - } - - .header-content { - flex-direction: column; - gap: 1rem; - text-align: center; - } - - .button-group { - flex-direction: column; - } - - .legend { - padding: 0.5rem 0.75rem; - gap: 0.75rem; - } - - .provider-list { - padding: 0.75rem; + grid-template-columns: 1fr; + grid-template-areas: + "control" + "status" + "visualization" + "providers"; + padding: 0.5rem; } .modal-content { width: 95%; - margin: 5% auto; - max-height: 90vh; + margin: 2% auto; } - .modal-details { - padding: 0.75rem; - gap: 0.5rem; + .graph-container { + height: 400px; } - .entity-node-grid { + .provider-list { grid-template-columns: 1fr; } - - .quick-stats { - flex-direction: column; - gap: 0.5rem; - } - - .relationship-item { - flex-direction: column; - align-items: flex-start; - gap: 0.5rem; - } - - .status-row { - flex-direction: column; - align-items: flex-start; - gap: 0.5rem; - } - - .footer-content { - flex-direction: column; - gap: 0.5rem; - } } \ No newline at end of file diff --git a/static/js/graph.js b/static/js/graph.js index f1c703f..079eb6b 100644 --- a/static/js/graph.js +++ b/static/js/graph.js @@ -1,6 +1,7 @@ /** * Graph visualization module for DNSRecon * Handles network graph rendering using vis.js with proper large entity node hiding + * UPDATED: Now compatible with a strictly flat, unified data model for attributes. */ const contextMenuCSS = ` .graph-context-menu { @@ -213,7 +214,6 @@ class GraphManager { }); document.body.appendChild(this.contextMenu); - console.log('Context menu created and added to body'); } /** @@ -290,7 +290,6 @@ class GraphManager { // FIXED: Right-click context menu this.container.addEventListener('contextmenu', (event) => { event.preventDefault(); - console.log('Right-click detected at:', event.offsetX, event.offsetY); // Get coordinates relative to the canvas const pointer = { @@ -299,7 +298,6 @@ class GraphManager { }; const nodeId = this.network.getNodeAt(pointer); - console.log('Node at pointer:', nodeId); if (nodeId) { // Pass the original client event for positioning @@ -340,19 +338,12 @@ class GraphManager { // Stabilization events with progress this.network.on('stabilizationProgress', (params) => { const progress = params.iterations / params.total; - this.updateStabilizationProgress(progress); }); this.network.on('stabilizationIterationsDone', () => { this.onStabilizationComplete(); }); - // Selection events - this.network.on('select', (params) => { - console.log('Selected nodes:', params.nodes); - console.log('Selected edges:', params.edges); - }); - // Click away to hide context menu document.addEventListener('click', (e) => { if (!this.contextMenu.contains(e.target)) { @@ -376,28 +367,62 @@ class GraphManager { this.initialize(); } + this.initialTargetIds = new Set(graphData.initial_targets || []); + // Check if we have actual data to display + const hasData = graphData.nodes.length > 0 || graphData.edges.length > 0; + + // Handle placeholder visibility + const placeholder = this.container.querySelector('.graph-placeholder'); + if (placeholder) { + if (hasData) { + placeholder.style.display = 'none'; + } else { + placeholder.style.display = 'flex'; + // Early return if no data to process + return; + } + } + this.largeEntityMembers.clear(); const largeEntityMap = new Map(); graphData.nodes.forEach(node => { - if (node.type === 'large_entity' && node.attributes && Array.isArray(node.attributes.nodes)) { - node.attributes.nodes.forEach(nodeId => { - largeEntityMap.set(nodeId, node.id); - this.largeEntityMembers.add(nodeId); - }); + if (node.type === 'large_entity' && node.attributes) { + const nodesAttribute = this.findAttributeByName(node.attributes, 'nodes'); + if (nodesAttribute && Array.isArray(nodesAttribute.value)) { + nodesAttribute.value.forEach(nodeId => { + largeEntityMap.set(nodeId, node.id); + this.largeEntityMembers.add(nodeId); + }); + } } }); const filteredNodes = graphData.nodes.filter(node => { - // Only include nodes that are NOT members of large entities, but always include the container itself return !this.largeEntityMembers.has(node.id) || node.type === 'large_entity'; }); console.log(`Filtered ${graphData.nodes.length - filteredNodes.length} large entity member nodes from visualization`); - // Process only the filtered nodes + // Process nodes with proper certificate coloring const processedNodes = filteredNodes.map(node => { - return this.processNode(node); + const processed = this.processNode(node); + + // Apply certificate-based coloring here in frontend + if (node.type === 'domain' && Array.isArray(node.attributes)) { + const certInfo = this.analyzeCertificateInfo(node.attributes); + + if (certInfo.hasExpiredOnly) { + // Red for domains with only expired/invalid certificates + processed.color = { background: '#ff6b6b', border: '#cc5555' }; + } else if (!certInfo.hasCertificates) { + // Grey for domains with no certificates + processed.color = { background: '#c7c7c7', border: '#999999' }; + } + // Valid certificates use default green (handled by processNode) + } + + return processed; }); const mergedEdges = {}; @@ -434,24 +459,19 @@ class GraphManager { const existingNodeIds = this.nodes.getIds(); const existingEdgeIds = this.edges.getIds(); - // Add new nodes with fade-in animation const newNodes = processedNodes.filter(node => !existingNodeIds.includes(node.id)); const newEdges = processedEdges.filter(edge => !existingEdgeIds.includes(edge.id)); - // Update existing data this.nodes.update(processedNodes); this.edges.update(processedEdges); - // After data is loaded, apply filters this.updateFilterControls(); this.applyAllFilters(); - // Highlight new additions briefly if (newNodes.length > 0 || newEdges.length > 0) { setTimeout(() => this.highlightNewElements(newNodes, newEdges), 100); } - // Auto-fit view for small graphs or first update if (processedNodes.length <= 10 || existingNodeIds.length === 0) { setTimeout(() => this.fitView(), 800); } @@ -465,9 +485,62 @@ class GraphManager { } } + analyzeCertificateInfo(attributes) { + let hasCertificates = false; + let hasValidCertificates = false; + let hasExpiredCertificates = false; + + for (const attr of attributes) { + const attrName = (attr.name || '').toLowerCase(); + const attrProvider = (attr.provider || '').toLowerCase(); + const attrValue = attr.value; + + // Look for certificate attributes from crtsh provider + if (attrProvider === 'crtsh' || attrName.startsWith('cert_')) { + hasCertificates = true; + + // Check certificate validity using raw attribute names + if (attrName === 'cert_is_currently_valid') { + if (attrValue === true) { + hasValidCertificates = true; + } else if (attrValue === false) { + hasExpiredCertificates = true; + } + } + // Check for expiry indicators + else if (attrName === 'cert_expires_soon' && attrValue === true) { + hasExpiredCertificates = true; + } + else if (attrName.includes('expired') && attrValue === true) { + hasExpiredCertificates = true; + } + } + } + + return { + hasCertificates, + hasValidCertificates, + hasExpiredCertificates, + hasExpiredOnly: hasExpiredCertificates && !hasValidCertificates + }; + } + /** - * Process node data with styling and metadata - * @param {Object} node - Raw node data + * UPDATED: Helper method to find an attribute by name in the standardized attributes list + * @param {Array} attributes - List of StandardAttribute objects + * @param {string} name - Attribute name to find + * @returns {Object|null} The attribute object if found, null otherwise + */ + findAttributeByName(attributes, name) { + if (!Array.isArray(attributes)) { + return null; + } + return attributes.find(attr => attr.name === name) || null; + } + + /** + * UPDATED: Process node data with styling and metadata for the flat data model + * @param {Object} node - Raw node data with standardized attributes * @returns {Object} Processed node data */ processNode(node) { @@ -478,7 +551,7 @@ class GraphManager { size: this.getNodeSize(node.type), borderColor: this.getNodeBorderColor(node.type), shape: this.getNodeShape(node.type), - attributes: node.attributes || {}, + attributes: node.attributes || [], description: node.description || '', metadata: node.metadata || {}, type: node.type, @@ -490,27 +563,34 @@ class GraphManager { if (node.confidence) { processedNode.borderWidth = Math.max(2, Math.floor(node.confidence * 5)); } - - // Style based on certificate validity - if (node.type === 'domain') { - if (node.attributes && node.attributes.certificates && node.attributes.certificates.has_valid_cert === false) { - processedNode.color = { background: '#888888', border: '#666666' }; + + // FIXED: Certificate-based domain coloring + if (node.type === 'domain' && Array.isArray(node.attributes)) { + const certInfo = this.analyzeCertificateInfo(node.attributes); + + if (certInfo.hasExpiredOnly) { + // Red for domains with only expired/invalid certificates + processedNode.color = '#ff6b6b'; + processedNode.borderColor = '#cc5555'; + } else if (!certInfo.hasCertificates) { + // Grey for domains with no certificates + processedNode.color = '#c7c7c7'; + processedNode.borderColor = '#999999'; } + // Green for valid certificates (default color) } - // Handle merged correlation objects (similar to large entities) + // Handle merged correlation objects if (node.type === 'correlation_object') { const metadata = node.metadata || {}; const values = metadata.values || []; const mergeCount = metadata.merge_count || 1; if (mergeCount > 1) { - // Display as merged correlation container processedNode.label = `Correlations (${mergeCount})`; processedNode.title = `Merged correlation container with ${mergeCount} values: ${values.slice(0, 3).join(', ')}${values.length > 3 ? '...' : ''}`; - processedNode.borderWidth = 3; // Thicker border for merged nodes + processedNode.borderWidth = 3; } else { - // Single correlation value const value = Array.isArray(values) && values.length > 0 ? values[0] : (metadata.value || 'Unknown'); const displayValue = typeof value === 'string' && value.length > 20 ? value.substring(0, 17) + '...' : value; processedNode.label = `${displayValue}`; @@ -521,6 +601,7 @@ class GraphManager { return processedNode; } + /** * Process edge data with styling and metadata * @param {Object} edge - Raw edge data @@ -584,7 +665,8 @@ class GraphManager { const colors = { 'domain': '#00ff41', // Green 'ip': '#ff9900', // Amber - 'asn': '#00aaff', // Blue + 'isp': '#00aaff', // Blue + 'ca': '#ff6b6b', // Red 'large_entity': '#ff6b6b', // Red for large entities 'correlation_object': '#9620c0ff' }; @@ -600,7 +682,8 @@ class GraphManager { const borderColors = { 'domain': '#00aa2e', 'ip': '#cc7700', - 'asn': '#0088cc', + 'isp': '#0088cc', + 'ca': '#cc5555', 'correlation_object': '#c235c9ff' }; return borderColors[nodeType] || '#666666'; @@ -615,9 +698,10 @@ class GraphManager { const sizes = { 'domain': 12, 'ip': 14, - 'asn': 16, + 'isp': 16, + 'ca': 16, 'correlation_object': 8, - 'large_entity': 5 + 'large_entity': 25 }; return sizes[nodeType] || 12; } @@ -631,9 +715,10 @@ class GraphManager { const shapes = { 'domain': 'dot', 'ip': 'square', - 'asn': 'triangle', + 'isp': 'triangle', + 'ca': 'diamond', 'correlation_object': 'hexagon', - 'large_entity': 'database' + 'large_entity': 'dot' }; return shapes[nodeType] || 'dot'; } @@ -889,15 +974,6 @@ class GraphManager { }, 2000); } - /** - * Update stabilization progress - * @param {number} progress - Progress value (0-1) - */ - updateStabilizationProgress(progress) { - // Could show a progress indicator if needed - console.log(`Graph stabilization: ${(progress * 100).toFixed(1)}%`); - } - /** * Handle stabilization completion */ @@ -982,7 +1058,7 @@ class GraphManager { this.edges.clear(); this.history = []; this.largeEntityMembers.clear(); // Clear large entity tracking - this.clearInitialTargets(); + this.initialTargetIds.clear(); // Show placeholder const placeholder = this.container.querySelector('.graph-placeholder'); @@ -1085,11 +1161,11 @@ class GraphManager { adjacencyList ); - console.log(`Reachability analysis complete:`, { + /*console.log(`Reachability analysis complete:`, { reachable: analysis.reachableNodes.size, unreachable: analysis.unreachableNodes.size, clusters: analysis.isolatedClusters.length - }); + });*/ return analysis; } @@ -1157,16 +1233,6 @@ class GraphManager { }; } - addInitialTarget(targetId) { - this.initialTargetIds.add(targetId); - console.log("Initial targets:", this.initialTargetIds); - } - - clearInitialTargets() { - this.initialTargetIds.clear(); - console.log("Initial targets cleared."); - } - updateFilterControls() { if (!this.filterPanel) return; const nodeTypes = new Set(this.nodes.get().map(n => n.type)); @@ -1204,7 +1270,6 @@ class GraphManager { * Replaces the existing applyAllFilters() method */ applyAllFilters() { - console.log("Applying filters with enhanced reachability analysis..."); if (this.nodes.length === 0) return; // Get filter criteria from UI @@ -1260,23 +1325,11 @@ class GraphManager { operation: 'hide_with_reachability', timestamp: Date.now() }; - - // Apply hiding with forensic documentation - const updates = nodesToHide.map(id => ({ - id: id, - hidden: true, - forensicNote: `Hidden due to reachability analysis from ${nodeId}` - })); + const updates = nodesToHide.map(id => ({ id: id, hidden: true })); this.nodes.update(updates); this.addToHistory('hide', historyData); - console.log(`Forensic hide operation: ${nodesToHide.length} nodes hidden`, { - originalTarget: nodeId, - cascadeNodes: nodesToHide.length - 1, - isolatedClusters: analysis.isolatedClusters.length - }); - return { hiddenNodes: nodesToHide, isolatedClusters: analysis.isolatedClusters @@ -1359,9 +1412,7 @@ class GraphManager { // Handle operation results if (!operationFailed) { - this.addToHistory('delete', historyData); - console.log(`Forensic delete operation completed:`, historyData.forensicAnalysis); - + this.addToHistory('delete', historyData); return { success: true, deletedNodes: nodesToDelete, @@ -1452,7 +1503,6 @@ class GraphManager { e.stopPropagation(); const action = e.currentTarget.dataset.action; const nodeId = e.currentTarget.dataset.nodeId; - console.log('Context menu action:', action, 'for node:', nodeId); this.performContextMenuAction(action, nodeId); this.hideContextMenu(); }); @@ -1472,9 +1522,7 @@ class GraphManager { * UPDATED: Enhanced context menu actions using new methods * Updates the existing performContextMenuAction() method */ - performContextMenuAction(action, nodeId) { - console.log('Performing enhanced action:', action, 'on node:', nodeId); - + performContextMenuAction(action, nodeId) { switch (action) { case 'focus': this.focusOnNode(nodeId); diff --git a/static/js/main.js b/static/js/main.js index 7c7b920..ee56fe7 100644 --- a/static/js/main.js +++ b/static/js/main.js @@ -1,6 +1,7 @@ /** * Main application logic for DNSRecon web interface * Handles UI interactions, API communication, and data flow + * UPDATED: Now compatible with a strictly flat, unified data model for attributes. */ class DNSReconApp { @@ -33,6 +34,9 @@ class DNSReconApp { this.updateStatus(); this.loadProviders(); this.initializeEnhancedModals(); + this.addCheckboxStyling(); + + this.updateGraph(); console.log('DNSRecon application initialized successfully'); } catch (error) { @@ -41,7 +45,7 @@ class DNSReconApp { } }); } - + /** * Initialize DOM element references */ @@ -54,7 +58,10 @@ class DNSReconApp { startScan: document.getElementById('start-scan'), addToGraph: document.getElementById('add-to-graph'), stopScan: document.getElementById('stop-scan'), - exportResults: document.getElementById('export-results'), + exportOptions: document.getElementById('export-options'), + exportModal: document.getElementById('export-modal'), + exportModalClose: document.getElementById('export-modal-close'), + exportGraphJson: document.getElementById('export-graph-json'), configureSettings: document.getElementById('configure-settings'), // Status elements @@ -77,9 +84,7 @@ class DNSReconApp { // Settings Modal elements settingsModal: document.getElementById('settings-modal'), settingsModalClose: document.getElementById('settings-modal-close'), - apiKeyInputs: document.getElementById('api-key-inputs'), - saveApiKeys: document.getElementById('save-api-keys'), - resetApiKeys: document.getElementById('reset-api-keys'), + // Other elements sessionId: document.getElementById('session-id'), @@ -116,7 +121,6 @@ class DNSReconApp { max-width: 400px; `; document.body.appendChild(messageContainer); - console.log('Message container created'); } } @@ -131,7 +135,6 @@ class DNSReconApp { this.initializeModalFunctionality(); this.elements.startScan.addEventListener('click', (e) => { - console.log('Start scan button clicked'); e.preventDefault(); this.startScan(); }); @@ -142,23 +145,33 @@ class DNSReconApp { }); this.elements.stopScan.addEventListener('click', (e) => { - console.log('Stop scan button clicked'); e.preventDefault(); this.stopScan(); }); - this.elements.exportResults.addEventListener('click', (e) => { - console.log('Export results button clicked'); + this.elements.exportOptions.addEventListener('click', (e) => { e.preventDefault(); - this.exportResults(); + this.showExportModal(); }); + if (this.elements.exportModalClose) { + this.elements.exportModalClose.addEventListener('click', () => this.hideExportModal()); + } + if (this.elements.exportModal) { + this.elements.exportModal.addEventListener('click', (e) => { + if (e.target === this.elements.exportModal) this.hideExportModal(); + }); + } + if (this.elements.exportGraphJson) { + this.elements.exportGraphJson.addEventListener('click', () => this.exportGraphJson()); + } + + this.elements.configureSettings.addEventListener('click', () => this.showSettingsModal()); // Enter key support for target domain input this.elements.targetInput.addEventListener('keypress', (e) => { if (e.key === 'Enter' && !this.isScanning) { - console.log('Enter key pressed in domain input'); this.startScan(); } }); @@ -183,10 +196,21 @@ class DNSReconApp { }); } if (this.elements.saveApiKeys) { - this.elements.saveApiKeys.addEventListener('click', () => this.saveApiKeys()); + this.elements.saveApiKeys.removeEventListener('click', this.saveApiKeys); } if (this.elements.resetApiKeys) { - this.elements.resetApiKeys.addEventListener('click', () => this.resetApiKeys()); + this.elements.resetApiKeys.removeEventListener('click', this.resetApiKeys); + } + + // Setup new handlers + const saveSettingsBtn = document.getElementById('save-settings'); + const resetSettingsBtn = document.getElementById('reset-settings'); + + if (saveSettingsBtn) { + saveSettingsBtn.addEventListener('click', () => this.saveSettings()); + } + if (resetSettingsBtn) { + resetSettingsBtn.addEventListener('click', () => this.resetSettings()); } // Listen for the custom event from the graph @@ -211,6 +235,7 @@ class DNSReconApp { if (e.key === 'Escape') { this.hideModal(); this.hideSettingsModal(); + this.hideExportModal(); // Add this line } }); @@ -283,12 +308,8 @@ class DNSReconApp { force_rescan_target: forceRescanTarget }; - console.log('Request data:', requestData); - const response = await this.apiCall('/api/scan/start', 'POST', requestData); - - console.log('API response received:', response); - + if (response.success) { this.currentSessionId = response.scan_id; this.showSuccess('Reconnaissance scan started successfully'); @@ -297,8 +318,6 @@ class DNSReconApp { this.graphManager.clear(); } - this.graphManager.addInitialTarget(target); - console.log(`Scan started for ${target} with depth ${maxDepth}`); // Start polling immediately with faster interval for responsiveness @@ -341,7 +360,6 @@ class DNSReconApp { if (response.success) { this.showSuccess('Scan stop requested'); - console.log('Scan stop requested successfully'); // Force immediate status update setTimeout(() => { @@ -375,26 +393,96 @@ class DNSReconApp { } /** - * Export scan results + * Show Export modal */ - async exportResults() { + showExportModal() { + if (this.elements.exportModal) { + this.elements.exportModal.style.display = 'block'; + } + } + + /** + * Hide Export modal + */ + hideExportModal() { + if (this.elements.exportModal) { + this.elements.exportModal.style.display = 'none'; + } + } + + /** + * Export graph data as JSON with proper error handling + */ + async exportGraphJson() { try { - console.log('Exporting results...'); + console.log('Exporting graph data as JSON...'); - // Create a temporary link to trigger download + // Show loading state + if (this.elements.exportGraphJson) { + const originalContent = this.elements.exportGraphJson.innerHTML; + this.elements.exportGraphJson.innerHTML = '[...]Exporting...'; + this.elements.exportGraphJson.disabled = true; + + // Store original content for restoration + this.elements.exportGraphJson._originalContent = originalContent; + } + + // Make API call to get export data + const response = await fetch('/api/export', { + method: 'GET', + headers: { + 'Content-Type': 'application/json' + } + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || `HTTP ${response.status}: ${response.statusText}`); + } + + // Check if response is JSON or file download + const contentType = response.headers.get('content-type'); + if (contentType && contentType.includes('application/json') && !response.headers.get('content-disposition')) { + // This is an error response in JSON format + const errorData = await response.json(); + throw new Error(errorData.error || 'Export failed'); + } + + // Get the filename from headers or create one + const contentDisposition = response.headers.get('content-disposition'); + let filename = 'dnsrecon_export.json'; + if (contentDisposition) { + const filenameMatch = contentDisposition.match(/filename[^;=\n]*=((['"]).*?\2|[^;\n]*)/); + if (filenameMatch) { + filename = filenameMatch[1].replace(/['"]/g, ''); + } + } + + // Create blob and download + const blob = await response.blob(); + const url = window.URL.createObjectURL(blob); const link = document.createElement('a'); - link.href = '/api/export'; - link.download = ''; // Let server determine filename + link.href = url; + link.download = filename; document.body.appendChild(link); link.click(); document.body.removeChild(link); + window.URL.revokeObjectURL(url); - this.showSuccess('Results export initiated'); - console.log('Results export initiated'); + this.showSuccess('Graph data exported successfully'); + this.hideExportModal(); } catch (error) { - console.error('Failed to export results:', error); - this.showError(`Failed to export results: ${error.message}`); + console.error('Failed to export graph data:', error); + this.showError(`Export failed: ${error.message}`); + } finally { + // Restore button state + if (this.elements.exportGraphJson) { + const originalContent = this.elements.exportGraphJson._originalContent || + '[JSON]Export Graph Data'; + this.elements.exportGraphJson.innerHTML = originalContent; + this.elements.exportGraphJson.disabled = false; + } } } @@ -410,7 +498,6 @@ class DNSReconApp { } this.pollInterval = setInterval(() => { - console.log('--- Polling tick ---'); this.updateStatus(); this.updateGraph(); this.loadProviders(); @@ -435,16 +522,11 @@ class DNSReconApp { */ async updateStatus() { try { - console.log('Updating status...'); const response = await this.apiCall('/api/scan/status'); - console.log('Status response:', response); if (response.success && response.status) { const status = response.status; - console.log('Current scan status:', status.status); - console.log('Current progress:', status.progress_percentage + '%'); - console.log('Graph stats:', status.graph_statistics); this.updateStatusDisplay(status); @@ -474,7 +556,6 @@ class DNSReconApp { console.log('Updating graph...'); const response = await this.apiCall('/api/graph'); - console.log('Graph response:', response); if (response.success) { const graphData = response.graph; @@ -483,21 +564,8 @@ class DNSReconApp { console.log('- Nodes:', graphData.nodes ? graphData.nodes.length : 0); console.log('- Edges:', graphData.edges ? graphData.edges.length : 0); - /*if (graphData.nodes) { - graphData.nodes.forEach(node => { - console.log(` Node: ${node.id} (${node.type})`); - }); - } - - if (graphData.edges) { - graphData.edges.forEach(edge => { - console.log(` Edge: ${edge.from} -> ${edge.to} (${edge.label})`); - }); - }*/ - - // Only update if data has changed - if (this.hasGraphChanged(graphData)) { - console.log('*** GRAPH DATA CHANGED - UPDATING VISUALIZATION ***'); + // FIXED: Always update graph, even if empty - let GraphManager handle placeholder + if (this.graphManager) { this.graphManager.updateGraph(graphData); this.lastGraphUpdate = Date.now(); @@ -506,27 +574,37 @@ class DNSReconApp { if (this.elements.relationshipsDisplay) { this.elements.relationshipsDisplay.textContent = edgeCount; } - } else { - console.log('Graph data unchanged, skipping update'); } } else { console.error('Graph update failed:', response); + // FIXED: Show placeholder when graph update fails + if (this.graphManager && this.graphManager.container) { + const placeholder = this.graphManager.container.querySelector('.graph-placeholder'); + if (placeholder) { + placeholder.style.display = 'flex'; + } + } } } catch (error) { console.error('Failed to update graph:', error); - // Don't show error for graph updates to avoid spam + // FIXED: Show placeholder on error + if (this.graphManager && this.graphManager.container) { + const placeholder = this.graphManager.container.querySelector('.graph-placeholder'); + if (placeholder) { + placeholder.style.display = 'flex'; + } + } } } + /** * Update status display elements * @param {Object} status - Status object from server */ updateStatusDisplay(status) { - try { - console.log('Updating status display...'); - + try { // Update status text with animation if (this.elements.scanStatus) { const formattedStatus = this.formatStatus(status.status); @@ -581,8 +659,6 @@ class DNSReconApp { } this.setUIState(status.status, status.task_queue_size); - - console.log('Status display updated successfully'); } catch (error) { console.error('Error updating status display:', error); } @@ -677,9 +753,7 @@ class DNSReconApp { /** * UI state management with immediate button updates */ - setUIState(state, task_queue_size) { - console.log(`Setting UI state to: ${state}`); - + setUIState(state, task_queue_size) { const isQueueEmpty = task_queue_size === 0; switch (state) { @@ -740,12 +814,11 @@ class DNSReconApp { */ async loadProviders() { try { - console.log('Loading providers...'); const response = await this.apiCall('/api/providers'); if (response.success) { this.updateProviderDisplay(response.providers); - this.buildApiKeyModal(response.providers); + this.buildSettingsModal(response.providers); // Updated to use new function console.log('Providers loaded successfully'); } @@ -753,6 +826,411 @@ class DNSReconApp { console.error('Failed to load providers:', error); } } + + /** + * Build the enhanced settings modal with provider configuration and API keys + * @param {Object} providers - Provider information from backend + */ + buildSettingsModal(providers) { + this.buildProviderConfigSection(providers); + this.buildApiKeySection(providers); + this.updateSettingsCounts(providers); + } + + /** + * Build the provider configuration section with enable/disable checkboxes + * @param {Object} providers - Provider information + */ + buildProviderConfigSection(providers) { + const providerConfigList = document.getElementById('provider-config-list'); + if (!providerConfigList) return; + + providerConfigList.innerHTML = ''; + + for (const [name, info] of Object.entries(providers)) { + const providerConfig = document.createElement('div'); + providerConfig.className = 'provider-item'; + + const statusClass = info.enabled ? 'enabled' : 'disabled'; + const statusIcon = info.enabled ? '✓' : '✗'; + + providerConfig.innerHTML = ` +
+
${info.display_name}
+
+ ${statusIcon} ${info.enabled ? 'Enabled' : 'Disabled'} +
+
+
+
+ +
+
+ `; + + providerConfigList.appendChild(providerConfig); + } + + // Add checkbox styling and event handlers + this.setupProviderCheckboxes(); + } + + /** + * Setup provider checkbox styling and event handlers + */ + setupProviderCheckboxes() { + const checkboxes = document.querySelectorAll('.provider-toggle'); + + checkboxes.forEach(checkbox => { + // Apply existing checkbox styling + checkbox.style.cssText = ` + appearance: none; + width: 16px; + height: 16px; + border: 2px solid #555; + background: #1a1a1a; + cursor: pointer; + position: relative; + border-radius: 3px; + transition: all 0.3s ease; + `; + + // Update visual state + this.updateCheckboxAppearance(checkbox); + + // Add change event handler + checkbox.addEventListener('change', (e) => { + this.updateCheckboxAppearance(e.target); + }); + }); + } + + /** + * Add CSS for checkbox styling since we're using existing styles + */ + addCheckboxStyling() { + // Add CSS for the checkboxes to work with existing styles + const style = document.createElement('style'); + style.textContent = ` + .provider-toggle[data-checked="true"]::after { + content: '✓'; + position: absolute; + top: -2px; + left: 2px; + color: #1a1a1a; + font-size: 12px; + font-weight: bold; + } + + .provider-toggle:hover { + border-color: #00ff41; + } + + .api-key-status-row { + transition: all 0.3s ease; + } + + .provider-item { + margin-bottom: 1rem; + } + + .provider-item:last-child { + margin-bottom: 0; + } + `; + document.head.appendChild(style); + } + + + /** + * Update checkbox appearance based on checked state + */ + updateCheckboxAppearance(checkbox) { + if (checkbox.checked) { + checkbox.style.background = '#00ff41'; + checkbox.style.borderColor = '#00ff41'; + checkbox.style.setProperty('content', '"✓"', 'important'); + + // Add checkmark via pseudo-element simulation + checkbox.setAttribute('data-checked', 'true'); + } else { + checkbox.style.background = '#1a1a1a'; + checkbox.style.borderColor = '#555'; + checkbox.removeAttribute('data-checked'); + } + } + + /** + * Enhanced API key section builder - FIXED to always allow API key input + * @param {Object} providers - Provider information + */ + buildApiKeySection(providers) { + const apiKeyInputs = document.getElementById('api-key-inputs'); + if (!apiKeyInputs) return; + + apiKeyInputs.innerHTML = ''; + let hasApiKeyProviders = false; + + for (const [name, info] of Object.entries(providers)) { + if (info.requires_api_key) { + hasApiKeyProviders = true; + + const inputGroup = document.createElement('div'); + inputGroup.className = 'provider-item'; + + // Check if API key is set via backend (not clearable) or frontend (clearable) + const isBackendConfigured = info.api_key_source === 'backend'; + + if (info.api_key_configured && isBackendConfigured) { + // API key is configured via backend - show status only + inputGroup.innerHTML = ` +
+
${info.display_name}
+
✓ Backend Configured
+
+
+
+
+
API Key Active
+
+ Configured via environment variable +
+
+
+
+ `; + } else if (info.api_key_configured && !isBackendConfigured) { + // API key is configured via frontend - show status with clear option + inputGroup.innerHTML = ` +
+
${info.display_name}
+
✓ Web Configured
+
+
+
+
+
API Key Active
+
+ Set via web interface (session-only) +
+
+ +
+
+ `; + } else { + // API key not configured - ALWAYS show input field + const statusClass = info.enabled ? 'enabled' : 'api-key-required'; + const statusText = info.enabled ? '○ Ready for API Key' : '⚠️ API Key Required'; + + inputGroup.innerHTML = ` +
+
${info.display_name}
+
+ ${statusText} +
+
+
+ + +
+ ${info.api_key_help || `Provides enhanced ${info.display_name.toLowerCase()} data and context.`} + ${!info.enabled ? ' Enable the provider above to use this API key.' : ''} +
+
+ `; + } + + apiKeyInputs.appendChild(inputGroup); + } + } + + if (!hasApiKeyProviders) { + apiKeyInputs.innerHTML = ` +
+
No providers require API keys
+
All Active
+
+ `; + } + + // Setup clear button event handlers + this.setupApiKeyClearHandlers(); + } + + /** + * Setup API key clear button handlers + */ + setupApiKeyClearHandlers() { + document.querySelectorAll('.clear-api-key-btn').forEach(button => { + button.addEventListener('click', (e) => { + e.preventDefault(); + const provider = e.currentTarget.dataset.provider; + this.clearSingleApiKey(provider, e.currentTarget); + }); + }); + } + + /** + * Clear a single API key with immediate feedback + */ + async clearSingleApiKey(provider, buttonElement) { + try { + // Show immediate feedback + const originalContent = buttonElement.innerHTML; + buttonElement.innerHTML = '[...]Clearing...'; + buttonElement.disabled = true; + + const response = await this.apiCall('/api/config/api-keys', 'POST', { [provider]: '' }); + + if (response.success) { + // Find the parent container and update it + const providerContainer = buttonElement.closest('.provider-item'); + const statusRow = providerContainer.querySelector('.api-key-status-row'); + + // Animate out the current status + statusRow.style.transition = 'all 0.3s ease'; + statusRow.style.opacity = '0'; + statusRow.style.transform = 'translateX(-10px)'; + + setTimeout(() => { + // Replace with input field + const providerName = buttonElement.dataset.provider; + const apiKeySection = this.elements.apiKeyInputs; + + // Rebuild the API key section to reflect changes + this.loadProviders(); + + this.showSuccess(`API key for ${provider} has been cleared.`); + }, 300); + + } else { + throw new Error(response.error || 'Failed to clear API key'); + } + } catch (error) { + // Restore button on error + buttonElement.innerHTML = originalContent; + buttonElement.disabled = false; + this.showError(`Error clearing API key: ${error.message}`); + } + } + + /** + * Update settings modal counts + */ + updateSettingsCounts(providers) { + const providerCount = Object.keys(providers).length; + const apiKeyCount = Object.values(providers).filter(p => p.requires_api_key).length; + + const providerCountElement = document.getElementById('provider-count'); + const apiKeyCountElement = document.getElementById('api-key-count'); + + if (providerCountElement) providerCountElement.textContent = providerCount; + if (apiKeyCountElement) apiKeyCountElement.textContent = apiKeyCount; + } + + /** + * Enhanced save settings function + */ + async saveSettings() { + try { + const settings = { + apiKeys: {}, + providerSettings: {} + }; + + // Collect API key inputs + const apiKeyInputs = document.querySelectorAll('#api-key-inputs input[type="password"]'); + apiKeyInputs.forEach(input => { + const provider = input.dataset.provider; + const value = input.value.trim(); + if (provider && value) { + settings.apiKeys[provider] = value; + } + }); + + // Collect provider enable/disable settings + const providerCheckboxes = document.querySelectorAll('.provider-toggle'); + providerCheckboxes.forEach(checkbox => { + const provider = checkbox.dataset.provider; + if (provider) { + settings.providerSettings[provider] = { + enabled: checkbox.checked + }; + } + }); + + // Save API keys if any + if (Object.keys(settings.apiKeys).length > 0) { + const apiKeyResponse = await this.apiCall('/api/config/api-keys', 'POST', settings.apiKeys); + if (!apiKeyResponse.success) { + throw new Error(apiKeyResponse.error || 'Failed to save API keys'); + } + } + + // Save provider settings if any + if (Object.keys(settings.providerSettings).length > 0) { + const providerResponse = await this.apiCall('/api/config/providers', 'POST', settings.providerSettings); + if (!providerResponse.success) { + throw new Error(providerResponse.error || 'Failed to save provider settings'); + } + } + + this.showSuccess('Settings saved successfully'); + this.hideSettingsModal(); + + // Reload providers to reflect changes + this.loadProviders(); + + } catch (error) { + this.showError(`Error saving settings: ${error.message}`); + } + } + + /** + * Reset settings to defaults + */ + async resetSettings() { + try { + // Clear all API key inputs + const apiKeyInputs = document.querySelectorAll('#api-key-inputs input[type="password"]'); + apiKeyInputs.forEach(input => { + input.value = ''; + }); + + // Reset all provider checkboxes to enabled (default) + const providerCheckboxes = document.querySelectorAll('.provider-toggle'); + providerCheckboxes.forEach(checkbox => { + checkbox.checked = true; + this.updateCheckboxAppearance(checkbox); + }); + + // Reset recursion depth to default + const depthSelect = document.getElementById('max-depth'); + if (depthSelect) { + depthSelect.value = '2'; + } + + this.showInfo('Settings reset to defaults'); + + } catch (error) { + this.showError(`Error resetting settings: ${error.message}`); + } + } /** * Update provider display @@ -808,10 +1286,9 @@ class DNSReconApp { } /** - * Enhanced node details HTML generation with better visual hierarchy - * File: static/js/main.js (replace generateNodeDetailsHtml method) + * UPDATED: Enhanced node details HTML generation for unified data model + * Now properly groups attributes by provider/type with organized sections */ - generateNodeDetailsHtml(node) { if (!node) return '
Details not available.
'; @@ -844,7 +1321,7 @@ class DNSReconApp { `; - // Handle different node types with collapsible sections + // Handle different node types if (node.type === 'correlation_object') { detailsHtml += this.generateCorrelationDetails(node); } else if (node.type === 'large_entity') { @@ -857,25 +1334,18 @@ class DNSReconApp { return detailsHtml; } + /** + * UPDATED: Generate details for standard nodes with organized attribute grouping + */ generateStandardNodeDetails(node) { let html = ''; // Relationships sections html += this.generateRelationshipsSection(node); - // Enhanced attributes section with special certificate handling - if (node.attributes && Object.keys(node.attributes).length > 0) { - const { certificates, ...otherAttributes } = node.attributes; - - // Handle certificates separately with enhanced display - if (certificates) { - html += this.generateCertificateSection({ certificates }); - } - - // Handle other attributes normally - if (Object.keys(otherAttributes).length > 0) { - html += this.generateAttributesSection(otherAttributes); - } + // UPDATED: Enhanced attributes section with intelligent grouping (no formatting) + if (node.attributes && Array.isArray(node.attributes) && node.attributes.length > 0) { + html += this.generateOrganizedAttributesSection(node.attributes, node.type); } // Description section @@ -887,313 +1357,234 @@ class DNSReconApp { return html; } - /** - * Enhanced certificate section generation using existing styles - */ - generateCertificateSection(attributes) { - const certificates = attributes.certificates; - if (!certificates || typeof certificates !== 'object') { + generateOrganizedAttributesSection(attributes, nodeType) { + if (!Array.isArray(attributes) || attributes.length === 0) { return ''; } - - let html = ` - '; - return html; - } - /** - * Generate latest certificate info using existing attribute list - */ - generateLatestCertificateInfo(latest) { - const isValid = latest.is_currently_valid; - const statusText = isValid ? 'Valid' : 'Invalid/Expired'; - const statusColor = isValid ? '#00ff41' : '#ff6b6b'; + const groups = this.groupAttributesByProviderAndType(attributes, nodeType); + let html = ''; - let html = ` -
-
Most Recent Certificate
-
-
- Status: - ${statusText} -
-
- Issued: - ${latest.not_before || 'Unknown'} -
-
- Expires: - ${latest.not_after || 'Unknown'} -
-
- Issuer: - ${this.escapeHtml(latest.issuer_name || 'Unknown')} -
- ${latest.certificate_id ? ` -
- Certificate: - - - View on crt.sh ↗ - - -
- ` : ''} -
-
- `; - - return html; - } - - /** - * Generate certificate list using existing collapsible structure - */ - generateCertificateList(certificateDetails) { - if (!certificateDetails || certificateDetails.length === 0) { - return ''; - } - - // Limit display to prevent overwhelming the UI - const maxDisplay = 8; - const certificates = certificateDetails.slice(0, maxDisplay); - const remaining = certificateDetails.length - maxDisplay; - - let html = ` -
- 📋 Certificate Details (${certificates.length}${remaining > 0 ? ` of ${certificateDetails.length}` : ''}) -
- `; - - certificates.forEach((cert, index) => { - const isValid = cert.is_currently_valid; - let statusText = isValid ? '✅ Valid' : '❌ Invalid/Expired'; - let statusColor = isValid ? '#00ff41' : '#ff6b6b'; - - if (cert.expires_soon && isValid) { - statusText = '⚠️ Valid (Expiring Soon)'; - statusColor = '#ff9900'; - } - - html += ` -
-
- #${index + 1} - ${statusText} - ${cert.certificate_id ? ` - crt.sh ↗ - ` : ''} -
-
-
- Common Name: - ${this.escapeHtml(cert.common_name || 'N/A')} -
-
- Issuer: - ${this.escapeHtml(cert.issuer_name || 'Unknown')} -
-
- Valid From: - ${cert.not_before || 'Unknown'} -
-
- Valid Until: - ${cert.not_after || 'Unknown'} -
- ${cert.validity_period_days ? ` -
- Period: - ${cert.validity_period_days} days -
- ` : ''} -
-
- `; + const sortedGroups = Object.entries(groups).sort((a, b) => { + const priorityOrder = { 'high': 0, 'medium': 1, 'low': 2 }; + return priorityOrder[a[1].priority] - priorityOrder[b[1].priority]; }); - if (remaining > 0) { + for (const [groupName, groupData] of sortedGroups) { + if (groupData.attributes.length === 0) continue; + + const isOpen = groupData.priority === 'high'; + html += ` -
- 📋 ${remaining} additional certificate${remaining > 1 ? 's' : ''} not shown.
- Use the export function to see all certificates. -
+ '; + } + + return html; + } + + formatAttributeValue(attr) { + const value = attr.value; + const name = attr.name || ''; + + if (value === null || value === undefined) { + return 'N/A'; } - html += '
'; - return html; + if (Array.isArray(value)) { + if (value.length === 0) { + return 'Empty Array'; + } + + // ENHANCED: Special handling for specific DNS record types + if (name.endsWith('_records') || name.includes('record')) { + const recordType = name.replace('_records', '').toUpperCase(); + + // Format nicely for DNS records + if (value.length <= 5) { + const formattedRecords = value.map(record => { + // Add record type prefix if not already present + if (recordType !== 'DNS' && !record.includes(':')) { + return `${recordType}: ${record}`; + } + return record; + }); + return this.escapeHtml(formattedRecords.join('\n')); + } else { + const preview = value.slice(0, 3).map(record => { + if (recordType !== 'DNS' && !record.includes(':')) { + return `${recordType}: ${record}`; + } + return record; + }).join('\n'); + return this.escapeHtml(`${preview}\n... (+${value.length - 3} more ${recordType} records)`); + } + } + + // For other arrays (existing logic) + if (value.length <= 3) { + return this.escapeHtml(value.join(', ')); + } else { + const preview = value.slice(0, 2).join(', '); + return this.escapeHtml(`${preview} ... (${value.length} total)`); + } + } + + if (typeof value === 'object') { + return 'Object'; + } + + return this.escapeHtml(String(value)); + } + + groupAttributesByProviderAndType(attributes, nodeType) { + if (!Array.isArray(attributes) || attributes.length === 0) { + return {}; + } + + const groups = { + 'DNS Records': { icon: '📋', priority: 'high', attributes: [] }, + 'Certificate Information': { icon: '🔒', priority: 'high', attributes: [] }, + 'Network Information': { icon: '🌐', priority: 'high', attributes: [] }, + 'Provider Data': { icon: '📊', priority: 'medium', attributes: [] }, + 'Technical Details': { icon: '⚙️', priority: 'low', attributes: [] } + }; + + for (const attr of attributes) { + const provider = (attr.provider || '').toLowerCase(); + const name = (attr.name || '').toLowerCase(); + const type = (attr.type || '').toLowerCase(); + + let assigned = false; + + // ENHANCED: Better DNS record detection for specific record types + if (provider === 'dns' || + name.endsWith('_records') || // Catches a_records, mx_records, txt_records, etc. + name.includes('record') || + ['ptr', 'mx', 'cname', 'ns', 'txt', 'soa', 'srv', 'caa', 'a_records', 'aaaa_records'].some(keyword => name.includes(keyword))) { + groups['DNS Records'].attributes.push(attr); + assigned = true; + } + // Certificate-related attributes + else if (provider === 'crtsh' || name.startsWith('cert_') || + ['certificate', 'ssl', 'tls', 'issuer', 'validity', 'san'].some(keyword => name.includes(keyword))) { + groups['Certificate Information'].attributes.push(attr); + assigned = true; + } + // Network/Shodan attributes + else if (provider === 'shodan' || + ['port', 'service', 'banner', 'asn', 'organization', 'country', 'city', 'network'].some(keyword => name.includes(keyword))) { + groups['Network Information'].attributes.push(attr); + assigned = true; + } + // Provider-specific data + else if (provider && ['shodan_', 'crtsh_', 'dns_'].some(prefix => name.startsWith(prefix))) { + groups['Provider Data'].attributes.push(attr); + assigned = true; + } + + // If not assigned to any specific group, put in technical details + if (!assigned) { + groups['Technical Details'].attributes.push(attr); + } + } + + // Remove empty groups + Object.keys(groups).forEach(groupName => { + if (groups[groupName].attributes.length === 0) { + delete groups[groupName]; + } + }); + + return groups; + } + + formatEdgeLabel(relationshipType, confidence) { + if (!relationshipType) return ''; + + const confidenceText = confidence >= 0.8 ? '●' : confidence >= 0.6 ? '◐' : '○'; + return `${relationshipType} ${confidenceText}`; + } + + createEdgeTooltip(edge) { + let tooltip = `
`; + tooltip += `
${edge.label || 'Relationship'}
`; + tooltip += `
Confidence: ${(edge.confidence_score * 100).toFixed(1)}%
`; + + // UPDATED: Use raw provider name (no formatting) + if (edge.source_provider) { + tooltip += `
Provider: ${edge.source_provider}
`; + } + + if (edge.discovery_timestamp) { + const date = new Date(edge.discovery_timestamp); + tooltip += `
Discovered: ${date.toLocaleString()}
`; + } + + tooltip += `
`; + return tooltip; } /** - * Generate certificate summary using minimal new CSS + * UPDATED: Enhanced correlation details showing the correlated attribute clearly (no formatting) */ - generateCertificateSummary(certificates) { - const total = certificates.total_certificates || 0; - const valid = certificates.valid_certificates || 0; - const expired = certificates.expired_certificates || 0; - const expiringSoon = certificates.expires_soon_count || 0; - const issuers = certificates.unique_issuers || []; + generateCorrelationDetails(node) { + const metadata = node.metadata || {}; + const value = metadata.value; + const correlatedNodes = metadata.correlated_nodes || []; + const sources = metadata.sources || []; - let html = ` -
-
-
${total}
-
Total
-
-
-
${valid}
-
Valid
-
-
-
${expired}
-
Expired
-
-
-
${expiringSoon}
-
Expiring Soon
-
-
- `; + let html = ''; - // Certificate authorities using existing array display - if (issuers.length > 0) { - html += ` -
- Certificate Authorities: - -
- `; - - issuers.forEach(issuer => { - html += `
${this.escapeHtml(issuer)}
`; - }); - - html += '
'; - } + // Show what attribute is being correlated (raw names) + const primarySource = metadata.primary_source || 'unknown'; - return html; - } - - generateLargeEntityDetails(node) { - const attributes = node.attributes || {}; - const nodes = attributes.nodes || []; - const nodeType = attributes.node_type || 'nodes'; - - let html = ` + html += ` - - '; - - return html; - } - - generateCorrelationDetails(node) { - const metadata = node.metadata || {}; - const values = metadata.values || []; - const sources = metadata.sources || []; - const mergeCount = metadata.merge_count || 1; - - let html = ''; - - // Correlation values section with meaningful labels - reuses existing modal structure - html += ` - '; - - // Correlated nodes section - reuses existing relationship display - const correlatedNodes = metadata.correlated_nodes || []; + // Show the correlated nodes if (correlatedNodes.length > 0) { html += `