36 Commits

Author SHA1 Message Date
9285226cbc Merge pull request 'new-scheduler' (#1) from new-scheduler into main
Reviewed-on: mstoeck3/dnsrecon#1
2025-09-15 20:45:47 +00:00
overcuriousity
350055fcec successfully implemented scheduler 2025-09-15 22:44:38 +02:00
overcuriousity
4a5ecf7a37 new highest-priority-first scheduler 2025-09-15 22:21:17 +02:00
overcuriousity
71b2855d01 fixes to iteration context menu 2025-09-15 21:37:19 +02:00
overcuriousity
93a258170a context menu option 2025-09-15 21:27:21 +02:00
overcuriousity
e2d4e12057 also allow ip lookups in scan 2025-09-15 21:00:57 +02:00
overcuriousity
c076ee028f main page refinement 2025-09-15 20:44:45 +02:00
overcuriousity
cbfac0922a fix node modals 2025-09-15 20:27:43 +02:00
overcuriousity
881f7b74e5 fix graph delete&revert 2025-09-15 20:20:15 +02:00
overcuriousity
c347581a6c fix graph trueRoot 2025-09-15 20:11:40 +02:00
overcuriousity
30ee21f087 revert graph.js refactor 2025-09-15 18:06:11 +02:00
overcuriousity
2496ca26a5 small fixes 2025-09-15 17:52:09 +02:00
overcuriousity
8aa3c4933e fix large entity 2025-09-15 14:12:02 +02:00
overcuriousity
fc326a66c8 fix large entity 2025-09-15 13:58:30 +02:00
overcuriousity
51902e3155 it 2025-09-15 13:35:58 +02:00
overcuriousity
a261d706c8 update style 2025-09-15 01:10:28 +02:00
overcuriousity
2410e689b8 visual enhancements 2025-09-15 00:25:27 +02:00
overcuriousity
62470673fe integrate checkbox filters 2025-09-14 23:54:27 +02:00
overcuriousity
2658bd148b context menu 2025-09-14 23:42:45 +02:00
overcuriousity
f02381910d Merge branch 'main' of https://git.cc24.dev/mstoeck3/dnsrecon 2025-09-14 23:10:16 +02:00
overcuriousity
674ac59c98 iteration 2025-09-14 23:09:38 +02:00
434d1f4803 dump.rdb gelöscht 2025-09-14 20:55:33 +00:00
overcuriousity
eb9eea127b it 2025-09-14 22:37:23 +02:00
overcuriousity
ae07635ab6 update edge labels 2025-09-14 20:50:09 +02:00
overcuriousity
d7adf9ad8b it 2025-09-14 20:22:09 +02:00
overcuriousity
39ce0e9d11 great progress 2025-09-14 19:12:12 +02:00
overcuriousity
926f9e1096 fixes 2025-09-14 19:06:20 +02:00
overcuriousity
9499e62ccc it 2025-09-14 18:45:02 +02:00
overcuriousity
89ae06482e it 2025-09-14 18:02:15 +02:00
overcuriousity
7fe7ca41ba it 2025-09-14 17:40:18 +02:00
overcuriousity
949fbdbb45 itteration 2025-09-14 17:18:56 +02:00
overcuriousity
689e8c00d4 unify config 2025-09-14 16:17:26 +02:00
overcuriousity
3511f18f9a it 2025-09-14 16:07:58 +02:00
overcuriousity
72f7056bc7 it 2025-09-14 15:31:18 +02:00
overcuriousity
2ae33bc5ba it 2025-09-14 15:00:00 +02:00
overcuriousity
c91913fa13 it 2025-09-14 14:28:04 +02:00
24 changed files with 4662 additions and 1795 deletions

34
.env.example Normal file
View File

@@ -0,0 +1,34 @@
# ===============================================
# DNSRecon Environment Variables
# ===============================================
# Copy this file to .env and fill in your values.
# --- API Keys ---
# Add your Shodan API key for the Shodan provider to be enabled.
SHODAN_API_KEY=
# --- Flask & Session Settings ---
# A strong, random secret key is crucial for session security.
FLASK_SECRET_KEY=your-very-secret-and-random-key-here
FLASK_HOST=127.0.0.1
FLASK_PORT=5000
FLASK_DEBUG=True
# How long a user's session in the browser lasts (in hours).
FLASK_PERMANENT_SESSION_LIFETIME_HOURS=2
# How long inactive scanner data is stored in Redis (in minutes).
SESSION_TIMEOUT_MINUTES=60
# --- Application Core Settings ---
# The default number of levels to recurse when scanning.
DEFAULT_RECURSION_DEPTH=2
# Default timeout for provider API requests in seconds.
DEFAULT_TIMEOUT=30
# The number of concurrent provider requests to make.
MAX_CONCURRENT_REQUESTS=5
# The number of results from a provider that triggers the "large entity" grouping.
LARGE_ENTITY_THRESHOLD=100
# The number of times to retry a target if a provider fails.
MAX_RETRIES_PER_TARGET=8
# How long cached provider responses are stored (in hours).
CACHE_EXPIRY_HOURS=12

1
.gitignore vendored
View File

@@ -169,3 +169,4 @@ cython_debug/
#.idea/
dump.rdb
cache/

315
app.py
View File

@@ -1,3 +1,5 @@
# dnsrecon-reduced/app.py
"""
Flask application entry point for DNSRecon web interface.
Provides REST API endpoints and serves the web interface with user session support.
@@ -11,51 +13,41 @@ import io
from core.session_manager import session_manager
from config import config
from core.graph_manager import NodeType
from utils.helpers import is_valid_target
app = Flask(__name__)
app.config['SECRET_KEY'] = 'dnsrecon-dev-key-change-in-production'
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=2) # 2 hour session lifetime
# Use centralized configuration for Flask settings
app.config['SECRET_KEY'] = config.flask_secret_key
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=config.flask_permanent_session_lifetime_hours)
def get_user_scanner():
"""
User scanner retrieval with better error handling and debugging.
Retrieves the scanner for the current session, or creates a new
session and scanner if one doesn't exist.
"""
# Get current Flask session info for debugging
current_flask_session_id = session.get('dnsrecon_session_id')
client_ip = request.remote_addr
user_agent = request.headers.get('User-Agent', '')[:100] # Truncate for logging
# Try to get existing session
if current_flask_session_id:
existing_scanner = session_manager.get_session(current_flask_session_id)
if existing_scanner:
# Ensure session ID is set
existing_scanner.session_id = current_flask_session_id
return current_flask_session_id, existing_scanner
else:
print(f"Session {current_flask_session_id} not found in session manager")
# Create new session
print("Creating new session...")
# Create new session if none exists
print("Creating new session as none was found...")
new_session_id = session_manager.create_session()
new_scanner = session_manager.get_session(new_session_id)
if not new_scanner:
print(f"ERROR: Failed to retrieve newly created session {new_session_id}")
raise Exception("Failed to create new scanner session")
# Store in Flask session
session['dnsrecon_session_id'] = new_session_id
session.permanent = True
# Ensure session ID is set on scanner
new_scanner.session_id = new_session_id
print(f"Created new session: {new_session_id}")
print(f"New scanner status: {new_scanner.status}")
print("=== END SESSION DEBUG ===")
return new_session_id, new_scanner
@app.route('/')
@@ -67,101 +59,71 @@ def index():
@app.route('/api/scan/start', methods=['POST'])
def start_scan():
"""
Start a new reconnaissance scan with immediate GUI feedback.
Start a new reconnaissance scan. Creates a new isolated scanner if
clear_graph is true, otherwise adds to the existing one.
"""
print("=== API: /api/scan/start called ===")
try:
print("Getting JSON data from request...")
data = request.get_json()
print(f"Request data: {data}")
if not data or 'target' not in data:
return jsonify({'success': False, 'error': 'Missing target in request'}), 400
if not data or 'target_domain' not in data:
print("ERROR: Missing target_domain in request")
return jsonify({
'success': False,
'error': 'Missing target_domain in request'
}), 400
target_domain = data['target_domain'].strip()
target = data['target'].strip()
max_depth = data.get('max_depth', config.default_recursion_depth)
clear_graph = data.get('clear_graph', True)
force_rescan_target = data.get('force_rescan_target', None) # **FIX**: Get the new parameter
print(f"Parsed - target_domain: '{target_domain}', max_depth: {max_depth}")
print(f"Parsed - target: '{target}', max_depth: {max_depth}, clear_graph: {clear_graph}, force_rescan: {force_rescan_target}")
# Validation
if not target_domain:
print("ERROR: Target domain cannot be empty")
return jsonify({
'success': False,
'error': 'Target domain cannot be empty'
}), 400
if not target:
return jsonify({'success': False, 'error': 'Target cannot be empty'}), 400
if not is_valid_target(target):
return jsonify({'success': False, 'error': 'Invalid target format. Please enter a valid domain or IP address.'}), 400
if not isinstance(max_depth, int) or not 1 <= max_depth <= 5:
return jsonify({'success': False, 'error': 'Max depth must be an integer between 1 and 5'}), 400
if not isinstance(max_depth, int) or max_depth < 1 or max_depth > 5:
print(f"ERROR: Invalid max_depth: {max_depth}")
return jsonify({
'success': False,
'error': 'Max depth must be an integer between 1 and 5'
}), 400
user_session_id, scanner = None, None
print("Validation passed, getting user scanner...")
if clear_graph:
print("Clear graph requested: Creating a new, isolated scanner session.")
old_session_id = session.get('dnsrecon_session_id')
if old_session_id:
session_manager.terminate_session(old_session_id)
# Get user-specific scanner
user_session_id, scanner = get_user_scanner()
user_session_id = session_manager.create_session()
session['dnsrecon_session_id'] = user_session_id
session.permanent = True
scanner = session_manager.get_session(user_session_id)
else:
print("Adding to existing graph: Reusing the current scanner session.")
user_session_id, scanner = get_user_scanner()
# Ensure session ID is properly set
if not scanner.session_id:
scanner.session_id = user_session_id
if not scanner:
return jsonify({'success': False, 'error': 'Failed to get or create a scanner instance.'}), 500
print(f"Using session: {user_session_id}")
print(f"Scanner object ID: {id(scanner)}")
print(f"Using scanner {id(scanner)} in session {user_session_id}")
# Start scan
print(f"Calling start_scan on scanner {id(scanner)}...")
success = scanner.start_scan(target_domain, max_depth, clear_graph=clear_graph)
# Immediately update session state regardless of success
session_manager.update_session_scanner(user_session_id, scanner)
success = scanner.start_scan(target, max_depth, clear_graph=clear_graph, force_rescan_target=force_rescan_target) # **FIX**: Pass the new parameter
if success:
scan_session_id = scanner.logger.session_id
print(f"Scan started successfully with scan session ID: {scan_session_id}")
return jsonify({
'success': True,
'message': 'Scan started successfully',
'scan_id': scan_session_id,
'scan_id': scanner.logger.session_id,
'user_session_id': user_session_id,
'scanner_status': scanner.status,
'debug_info': {
'scanner_object_id': id(scanner),
'scanner_status': scanner.status
}
})
else:
print("ERROR: Scanner returned False")
# Provide more detailed error information
error_details = {
'scanner_status': scanner.status,
'scanner_object_id': id(scanner),
'session_id': user_session_id,
'providers_count': len(scanner.providers) if hasattr(scanner, 'providers') else 0
}
return jsonify({
'success': False,
'error': f'Failed to start scan (scanner status: {scanner.status})',
'debug_info': error_details
}), 409
except Exception as e:
print(f"ERROR: Exception in start_scan endpoint: {e}")
traceback.print_exc()
return jsonify({
'success': False,
'error': f'Internal server error: {str(e)}'
}), 500
return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500
@app.route('/api/scan/stop', methods=['POST'])
def stop_scan():
@@ -321,6 +283,81 @@ def get_graph_data():
}), 500
@app.route('/api/graph/node/<node_id>', methods=['DELETE'])
def delete_graph_node(node_id):
"""Delete a node from the graph for the current user session."""
try:
user_session_id, scanner = get_user_scanner()
if not scanner:
return jsonify({'success': False, 'error': 'No active session found'}), 404
success = scanner.graph.remove_node(node_id)
if success:
# Persist the change
session_manager.update_session_scanner(user_session_id, scanner)
return jsonify({'success': True, 'message': f'Node {node_id} deleted successfully.'})
else:
return jsonify({'success': False, 'error': f'Node {node_id} not found in graph.'}), 404
except Exception as e:
print(f"ERROR: Exception in delete_graph_node endpoint: {e}")
traceback.print_exc()
return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500
@app.route('/api/graph/revert', methods=['POST'])
def revert_graph_action():
"""Reverts a graph action, such as re-adding a deleted node."""
try:
data = request.get_json()
if not data or 'type' not in data or 'data' not in data:
return jsonify({'success': False, 'error': 'Invalid revert request format'}), 400
user_session_id, scanner = get_user_scanner()
if not scanner:
return jsonify({'success': False, 'error': 'No active session found'}), 404
action_type = data['type']
action_data = data['data']
if action_type == 'delete':
# Re-add the node
node_to_add = action_data.get('node')
if node_to_add:
scanner.graph.add_node(
node_id=node_to_add['id'],
node_type=NodeType(node_to_add['type']),
attributes=node_to_add.get('attributes'),
description=node_to_add.get('description'),
metadata=node_to_add.get('metadata')
)
# Re-add the edges
edges_to_add = action_data.get('edges', [])
for edge in edges_to_add:
# Add edge only if both nodes exist to prevent errors
if scanner.graph.graph.has_node(edge['from']) and scanner.graph.graph.has_node(edge['to']):
scanner.graph.add_edge(
source_id=edge['from'],
target_id=edge['to'],
relationship_type=edge['metadata']['relationship_type'],
confidence_score=edge['metadata']['confidence_score'],
source_provider=edge['metadata']['source_provider'],
raw_data=edge.get('raw_data', {})
)
# Persist the change
session_manager.update_session_scanner(user_session_id, scanner)
return jsonify({'success': True, 'message': 'Delete action reverted successfully.'})
return jsonify({'success': False, 'error': f'Unknown revert action type: {action_type}'}), 400
except Exception as e:
print(f"ERROR: Exception in revert_graph_action endpoint: {e}")
traceback.print_exc()
return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500
@app.route('/api/export', methods=['GET'])
def export_results():
@@ -367,12 +404,19 @@ def export_results():
@app.route('/api/providers', methods=['GET'])
def get_providers():
"""Get information about available providers for the user session."""
print("=== API: /api/providers called ===")
try:
# Get user-specific scanner
user_session_id, scanner = get_user_scanner()
if scanner:
# Updated debug print to be consistent with the new progress bar logic
completed_tasks = scanner.indicators_completed
total_tasks = scanner.total_tasks_ever_enqueued
print(f"DEBUG: Task Progress - Completed: {completed_tasks}, Total Enqueued: {total_tasks}")
else:
print("DEBUG: No active scanner session found.")
provider_info = scanner.get_provider_info()
return jsonify({
@@ -447,113 +491,6 @@ def set_api_keys():
'error': f'Internal server error: {str(e)}'
}), 500
@app.route('/api/session/info', methods=['GET'])
def get_session_info():
"""Get information about the current user session."""
try:
user_session_id, scanner = get_user_scanner()
session_info = session_manager.get_session_info(user_session_id)
return jsonify({
'success': True,
'session_info': session_info
})
except Exception as e:
print(f"ERROR: Exception in get_session_info endpoint: {e}")
traceback.print_exc()
return jsonify({
'success': False,
'error': f'Internal server error: {str(e)}'
}), 500
@app.route('/api/session/terminate', methods=['POST'])
def terminate_session():
"""Terminate the current user session."""
try:
user_session_id = session.get('dnsrecon_session_id')
if user_session_id:
success = session_manager.terminate_session(user_session_id)
# Clear Flask session
session.pop('dnsrecon_session_id', None)
return jsonify({
'success': success,
'message': 'Session terminated' if success else 'Session not found'
})
else:
return jsonify({
'success': False,
'error': 'No active session to terminate'
}), 400
except Exception as e:
print(f"ERROR: Exception in terminate_session endpoint: {e}")
traceback.print_exc()
return jsonify({
'success': False,
'error': f'Internal server error: {str(e)}'
}), 500
@app.route('/api/admin/sessions', methods=['GET'])
def list_sessions():
"""Admin endpoint to list all active sessions."""
try:
sessions = session_manager.list_active_sessions()
stats = session_manager.get_statistics()
return jsonify({
'success': True,
'sessions': sessions,
'statistics': stats
})
except Exception as e:
print(f"ERROR: Exception in list_sessions endpoint: {e}")
traceback.print_exc()
return jsonify({
'success': False,
'error': f'Internal server error: {str(e)}'
}), 500
@app.route('/api/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
try:
# Get session stats
session_stats = session_manager.get_statistics()
return jsonify({
'success': True,
'status': 'healthy',
'timestamp': datetime.now(timezone.utc).isoformat(),
'version': '1.0.0-phase2',
'phase': 2,
'features': {
'multi_provider': True,
'concurrent_processing': True,
'real_time_updates': True,
'api_key_management': True,
'visualization': True,
'retry_logic': True,
'user_sessions': True,
'session_isolation': True
},
'session_statistics': session_stats
})
except Exception as e:
print(f"ERROR: Exception in health_check endpoint: {e}")
return jsonify({
'success': False,
'error': f'Health check failed: {str(e)}'
}), 500
@app.errorhandler(404)
def not_found(error):
"""Handle 404 errors."""

1
cache/crtsh/llm_mikoshi_de.json vendored Normal file
View File

@@ -0,0 +1 @@
{"domain":"llm.mikoshi.de","first_cached":"2025-09-15T20:44:11.277759+00:00","last_upstream_query":"2025-09-15T20:44:11.277761+00:00","upstream_query_count":1,"certificates":[{"issuer_ca_id":295814,"issuer_name":"C=US, O=Let's Encrypt, CN=R10","common_name":"llm.mikoshi.de","name_value":"llm.mikoshi.de","id":19995849024,"entry_timestamp":"2025-07-30T00:02:19.725","not_before":"2025-07-29T23:03:47","not_after":"2025-10-27T23:03:46","serial_number":"05335d7df5f076bd039bf6148fe11ebfd86d","result_count":2},{"issuer_ca_id":295814,"issuer_name":"C=US, O=Let's Encrypt, CN=R10","common_name":"llm.mikoshi.de","name_value":"llm.mikoshi.de","id":19995848373,"entry_timestamp":"2025-07-30T00:02:17.527","not_before":"2025-07-29T23:03:47","not_after":"2025-10-27T23:03:46","serial_number":"05335d7df5f076bd039bf6148fe11ebfd86d","result_count":2},{"issuer_ca_id":295815,"issuer_name":"C=US, O=Let's Encrypt, CN=R11","common_name":"llm.mikoshi.de","name_value":"llm.mikoshi.de","id":18720891589,"entry_timestamp":"2025-05-31T00:01:22.576","not_before":"2025-05-30T23:02:49","not_after":"2025-08-28T23:02:48","serial_number":"064967b3c615cbb4a7f8690b4ee7ed3ab2c2","result_count":2},{"issuer_ca_id":295815,"issuer_name":"C=US, O=Let's Encrypt, CN=R11","common_name":"llm.mikoshi.de","name_value":"llm.mikoshi.de","id":18720891502,"entry_timestamp":"2025-05-31T00:01:20.192","not_before":"2025-05-30T23:02:49","not_after":"2025-08-28T23:02:48","serial_number":"064967b3c615cbb4a7f8690b4ee7ed3ab2c2","result_count":2},{"issuer_ca_id":295815,"issuer_name":"C=US, O=Let's Encrypt, CN=R11","common_name":"llm.mikoshi.de","name_value":"llm.mikoshi.de","id":17870870943,"entry_timestamp":"2025-03-31T19:43:16.66","not_before":"2025-03-31T18:44:46","not_after":"2025-06-29T18:44:45","serial_number":"05da28c4ebc6a250cb83e37fa4bc54b85508","result_count":2},{"issuer_ca_id":295815,"issuer_name":"C=US, O=Let's Encrypt, CN=R11","common_name":"llm.mikoshi.de","name_value":"llm.mikoshi.de","id":17539939653,"entry_timestamp":"2025-03-31T19:43:16.406","not_before":"2025-03-31T18:44:46","not_after":"2025-06-29T18:44:45","serial_number":"05da28c4ebc6a250cb83e37fa4bc54b85508","result_count":2}]}

1
cache/crtsh/matrix_mikoshi_de.json vendored Normal file
View File

@@ -0,0 +1 @@
{"domain":"matrix.mikoshi.de","first_cached":"2025-09-15T20:44:08.566365+00:00","last_upstream_query":"2025-09-15T20:44:08.566368+00:00","upstream_query_count":1,"certificates":[{"issuer_ca_id":295814,"issuer_name":"C=US, O=Let's Encrypt, CN=R10","common_name":"matrix.mikoshi.de","name_value":"matrix.mikoshi.de","id":17170805427,"entry_timestamp":"2025-02-13T00:01:24.095","not_before":"2025-02-12T23:02:53","not_after":"2025-05-13T23:02:52","serial_number":"0340c3ca26c1ab1678dd4c8885208ac93818","result_count":2},{"issuer_ca_id":295814,"issuer_name":"C=US, O=Let's Encrypt, CN=R10","common_name":"matrix.mikoshi.de","name_value":"matrix.mikoshi.de","id":16704107881,"entry_timestamp":"2025-02-13T00:01:23.078","not_before":"2025-02-12T23:02:53","not_after":"2025-05-13T23:02:52","serial_number":"0340c3ca26c1ab1678dd4c8885208ac93818","result_count":2},{"issuer_ca_id":295814,"issuer_name":"C=US, O=Let's Encrypt, CN=R10","common_name":"matrix.mikoshi.de","name_value":"matrix.mikoshi.de","id":17122315423,"entry_timestamp":"2025-02-09T15:17:51.214","not_before":"2025-02-09T14:19:20","not_after":"2025-05-10T14:19:19","serial_number":"03e088e499d2b5e10132f28674a00990313a","result_count":2},{"issuer_ca_id":295814,"issuer_name":"C=US, O=Let's Encrypt, CN=R10","common_name":"matrix.mikoshi.de","name_value":"matrix.mikoshi.de","id":16635517598,"entry_timestamp":"2025-02-09T15:17:50.818","not_before":"2025-02-09T14:19:20","not_after":"2025-05-10T14:19:19","serial_number":"03e088e499d2b5e10132f28674a00990313a","result_count":2}]}

137
config.py
View File

@@ -1,3 +1,5 @@
# dnsrecon-reduced/config.py
"""
Configuration management for DNSRecon tool.
Handles API key storage, rate limiting, and default settings.
@@ -5,110 +7,97 @@ Handles API key storage, rate limiting, and default settings.
import os
from typing import Dict, Optional
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class Config:
"""Configuration manager for DNSRecon application."""
def __init__(self):
"""Initialize configuration with default values."""
self.api_keys: Dict[str, Optional[str]] = {
'shodan': None
}
self.api_keys: Dict[str, Optional[str]] = {}
# Default settings
# --- General Settings ---
self.default_recursion_depth = 2
self.default_timeout = 10
self.default_timeout = 30
self.max_concurrent_requests = 5
self.large_entity_threshold = 100
self.max_retries_per_target = 8
self.cache_expiry_hours = 12
# Rate limiting settings (requests per minute)
# --- Provider Caching Settings ---
self.cache_timeout_hours = 6 # Provider-specific cache timeout
# --- Rate Limiting (requests per minute) ---
self.rate_limits = {
'crtsh': 60, # Free service, be respectful
'shodan': 60, # API dependent
'dns': 100 # Local DNS queries
'crtsh': 5,
'shodan': 60,
'dns': 100
}
# Provider settings
# --- Provider Settings ---
self.enabled_providers = {
'crtsh': True, # Always enabled (free)
'dns': True, # Always enabled (free)
'shodan': False # Requires API key
'crtsh': True,
'dns': True,
'shodan': False
}
# Logging configuration
# --- Logging ---
self.log_level = 'INFO'
self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
# Flask configuration
# --- Flask & Session Settings ---
self.flask_host = '127.0.0.1'
self.flask_port = 5000
self.flask_debug = True
self.flask_secret_key = 'default-secret-key-change-me'
self.flask_permanent_session_lifetime_hours = 2
self.session_timeout_minutes = 60
def set_api_key(self, provider: str, api_key: str) -> bool:
"""
Set API key for a provider.
Args:
provider: Provider name (shodan, etc)
api_key: API key string
Returns:
bool: True if key was set successfully
"""
if provider in self.api_keys:
self.api_keys[provider] = api_key
self.enabled_providers[provider] = True if api_key else False
return True
return False
def get_api_key(self, provider: str) -> Optional[str]:
"""
Get API key for a provider.
Args:
provider: Provider name
Returns:
API key or None if not set
"""
return self.api_keys.get(provider)
def is_provider_enabled(self, provider: str) -> bool:
"""
Check if a provider is enabled.
Args:
provider: Provider name
Returns:
bool: True if provider is enabled
"""
return self.enabled_providers.get(provider, False)
def get_rate_limit(self, provider: str) -> int:
"""
Get rate limit for a provider.
Args:
provider: Provider name
Returns:
Rate limit in requests per minute
"""
return self.rate_limits.get(provider, 60)
# Load environment variables to override defaults
self.load_from_env()
def load_from_env(self):
"""Load configuration from environment variables."""
if os.getenv('SHODAN_API_KEY'):
self.set_api_key('shodan', os.getenv('SHODAN_API_KEY'))
self.set_api_key('shodan', os.getenv('SHODAN_API_KEY'))
# Override default settings from environment
self.default_recursion_depth = int(os.getenv('DEFAULT_RECURSION_DEPTH', '2'))
self.flask_debug = os.getenv('FLASK_DEBUG', 'True').lower() == 'true'
self.default_timeout = 30
self.max_concurrent_requests = 5
# Override settings from environment
self.default_recursion_depth = int(os.getenv('DEFAULT_RECURSION_DEPTH', self.default_recursion_depth))
self.default_timeout = int(os.getenv('DEFAULT_TIMEOUT', self.default_timeout))
self.max_concurrent_requests = int(os.getenv('MAX_CONCURRENT_REQUESTS', self.max_concurrent_requests))
self.large_entity_threshold = int(os.getenv('LARGE_ENTITY_THRESHOLD', self.large_entity_threshold))
self.max_retries_per_target = int(os.getenv('MAX_RETRIES_PER_TARGET', self.max_retries_per_target))
self.cache_expiry_hours = int(os.getenv('CACHE_EXPIRY_HOURS', self.cache_expiry_hours))
self.cache_timeout_hours = int(os.getenv('CACHE_TIMEOUT_HOURS', self.cache_timeout_hours))
# Override Flask and session settings
self.flask_host = os.getenv('FLASK_HOST', self.flask_host)
self.flask_port = int(os.getenv('FLASK_PORT', self.flask_port))
self.flask_debug = os.getenv('FLASK_DEBUG', str(self.flask_debug)).lower() == 'true'
self.flask_secret_key = os.getenv('FLASK_SECRET_KEY', self.flask_secret_key)
self.flask_permanent_session_lifetime_hours = int(os.getenv('FLASK_PERMANENT_SESSION_LIFETIME_HOURS', self.flask_permanent_session_lifetime_hours))
self.session_timeout_minutes = int(os.getenv('SESSION_TIMEOUT_MINUTES', self.session_timeout_minutes))
def set_api_key(self, provider: str, api_key: Optional[str]) -> bool:
"""Set API key for a provider."""
self.api_keys[provider] = api_key
if api_key:
self.enabled_providers[provider] = True
return True
def get_api_key(self, provider: str) -> Optional[str]:
"""Get API key for a provider."""
return self.api_keys.get(provider)
def is_provider_enabled(self, provider: str) -> bool:
"""Check if a provider is enabled."""
return self.enabled_providers.get(provider, False)
def get_rate_limit(self, provider: str) -> int:
"""Get rate limit for a provider."""
return self.rate_limits.get(provider, 60)
# Global configuration instance
config = Config()

View File

@@ -1,3 +1,5 @@
# core/graph_manager.py
"""
Graph data model for DNSRecon using NetworkX.
Manages in-memory graph storage with confidence scoring and forensic metadata.
@@ -50,21 +52,23 @@ class GraphManager:
self.__dict__.update(state)
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = None):
"""Recursively traverse metadata and add hashable values to the index."""
def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = [], parent_attr: str = ""):
"""Recursively traverse metadata and add hashable values to the index with better path tracking."""
if path is None:
path = []
if isinstance(data, dict):
for key, value in data.items():
self._update_correlation_index(node_id, value, path + [key])
self._update_correlation_index(node_id, value, path + [key], key)
elif isinstance(data, list):
for i, item in enumerate(data):
self._update_correlation_index(node_id, item, path + [f"[{i}]"])
# Instead of just using [i], include the parent attribute context
list_path_component = f"[{i}]" if not parent_attr else f"{parent_attr}[{i}]"
self._update_correlation_index(node_id, item, path + [list_path_component], parent_attr)
else:
self._add_to_correlation_index(node_id, data, ".".join(path))
self._add_to_correlation_index(node_id, data, ".".join(path), parent_attr)
def _add_to_correlation_index(self, node_id: str, value: Any, path_str: str):
def _add_to_correlation_index(self, node_id: str, value: Any, path_str: str, parent_attr: str = ""):
"""Add a hashable value to the correlation index, filtering out noise."""
if not isinstance(value, (str, int, float, bool)) or value is None:
return
@@ -80,8 +84,8 @@ class GraphManager:
return
if len(value) < 4 or value.lower() in ['true', 'false', 'unknown', 'none', 'crt.sh']:
return
elif isinstance(value, int) and abs(value) < 9999:
return # Ignore small integers
elif isinstance(value, int) and (abs(value) < 1024 or abs(value) > 65535):
return # Ignore small integers and common port numbers
elif isinstance(value, bool):
return # Ignore boolean values
@@ -90,10 +94,47 @@ class GraphManager:
self.correlation_index[value] = {}
if node_id not in self.correlation_index[value]:
self.correlation_index[value][node_id] = []
if path_str not in self.correlation_index[value][node_id]:
self.correlation_index[value][node_id].append(path_str)
def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = None) -> List[Dict]:
# Store both the full path and the parent attribute for better edge labeling
correlation_entry = {
'path': path_str,
'parent_attr': parent_attr,
'meaningful_attr': self._extract_meaningful_attribute(path_str, parent_attr)
}
if correlation_entry not in self.correlation_index[value][node_id]:
self.correlation_index[value][node_id].append(correlation_entry)
def _extract_meaningful_attribute(self, path_str: str, parent_attr: str = "") -> str:
"""Extract the most meaningful attribute name from a path string."""
if not path_str:
return "unknown"
path_parts = path_str.split('.')
# Look for the last non-array-index part
for part in reversed(path_parts):
# Skip array indices like [0], [1], etc.
if not (part.startswith('[') and part.endswith(']') and part[1:-1].isdigit()):
# Clean up compound names like "hostnames[0]" to just "hostnames"
clean_part = re.sub(r'\[\d+\]$', '', part)
if clean_part:
return clean_part
# Fallback to parent attribute if available
if parent_attr:
return parent_attr
# Last resort - use the first meaningful part
for part in path_parts:
if not (part.startswith('[') and part.endswith(']') and part[1:-1].isdigit()):
clean_part = re.sub(r'\[\d+\]$', '', part)
if clean_part:
return clean_part
return "correlation"
def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = [], parent_attr: str = "") -> List[Dict]:
"""Recursively traverse metadata to find correlations with existing data."""
if path is None:
path = []
@@ -103,10 +144,11 @@ class GraphManager:
for key, value in data.items():
if key == 'source': # Avoid correlating on the provider name
continue
all_correlations.extend(self._check_for_correlations(new_node_id, value, path + [key]))
all_correlations.extend(self._check_for_correlations(new_node_id, value, path + [key], key))
elif isinstance(data, list):
for i, item in enumerate(data):
all_correlations.extend(self._check_for_correlations(new_node_id, item, path + [f"[{i}]"]))
list_path_component = f"[{i}]" if not parent_attr else f"{parent_attr}[{i}]"
all_correlations.extend(self._check_for_correlations(new_node_id, item, path + [list_path_component], parent_attr))
else:
value = data
if value in self.correlation_index:
@@ -117,11 +159,31 @@ class GraphManager:
if len(unique_nodes) < 2:
return all_correlations # Correlation must involve at least two distinct nodes
new_source = {'node_id': new_node_id, 'path': ".".join(path)}
new_source = {
'node_id': new_node_id,
'path': ".".join(path),
'parent_attr': parent_attr,
'meaningful_attr': self._extract_meaningful_attribute(".".join(path), parent_attr)
}
all_sources = [new_source]
for node_id, paths in existing_nodes_with_paths.items():
for p_str in paths:
all_sources.append({'node_id': node_id, 'path': p_str})
for node_id, path_entries in existing_nodes_with_paths.items():
for entry in path_entries:
if isinstance(entry, dict):
all_sources.append({
'node_id': node_id,
'path': entry['path'],
'parent_attr': entry.get('parent_attr', ''),
'meaningful_attr': entry.get('meaningful_attr', self._extract_meaningful_attribute(entry['path'], entry.get('parent_attr', '')))
})
else:
# Handle legacy string-only entries
all_sources.append({
'node_id': node_id,
'path': str(entry),
'parent_attr': '',
'meaningful_attr': self._extract_meaningful_attribute(str(entry))
})
all_correlations.append({
'value': value,
@@ -163,8 +225,7 @@ class GraphManager:
# Skip creating correlation node - would be redundant
continue
# STEP 2: Filter out node pairs that already have direct edges
eligible_nodes = self._filter_nodes_without_direct_edges(set(corr['nodes']))
eligible_nodes = set(corr['nodes'])
if len(eligible_nodes) < 2:
# Need at least 2 nodes to create a correlation
@@ -184,11 +245,12 @@ class GraphManager:
metadata={'values': [value], 'sources': corr['sources'],
'correlated_nodes': list(eligible_nodes)})
# Create edges from eligible nodes to this correlation node
# Create edges from eligible nodes to this correlation node with better labeling
for c_node_id in eligible_nodes:
if self.graph.has_node(c_node_id):
attribute = corr['sources'][0]['path'].split('.')[-1]
relationship_type = f"c_{attribute}"
# Find the best attribute name for this node
meaningful_attr = self._find_best_attribute_name_for_node(c_node_id, corr['sources'])
relationship_type = f"c_{meaningful_attr}"
self.add_edge(c_node_id, correlation_node_id, relationship_type, confidence_score=0.9)
self._update_correlation_index(node_id, attributes)
@@ -196,27 +258,34 @@ class GraphManager:
self.last_modified = datetime.now(timezone.utc).isoformat()
return is_new_node
def _filter_nodes_without_direct_edges(self, node_set: set) -> set:
"""
Filter out nodes that already have direct edges between them.
Returns set of nodes that should be included in correlation.
"""
nodes_list = list(node_set)
eligible_nodes = set(node_set) # Start with all nodes
def _find_best_attribute_name_for_node(self, node_id: str, sources: List[Dict]) -> str:
"""Find the best attribute name for a correlation edge by looking at the sources."""
node_sources = [s for s in sources if s['node_id'] == node_id]
# Check all pairs of nodes
for i in range(len(nodes_list)):
for j in range(i + 1, len(nodes_list)):
node_a = nodes_list[i]
node_b = nodes_list[j]
if not node_sources:
return "correlation"
# Check if direct edge exists in either direction
if self._has_direct_edge_bidirectional(node_a, node_b):
# Remove both nodes from eligible set since they're already connected
eligible_nodes.discard(node_a)
eligible_nodes.discard(node_b)
# Use the meaningful_attr if available
for source in node_sources:
meaningful_attr = source.get('meaningful_attr')
if meaningful_attr and meaningful_attr != "unknown":
return meaningful_attr
return eligible_nodes
# Fallback to parent_attr
for source in node_sources:
parent_attr = source.get('parent_attr')
if parent_attr:
return parent_attr
# Last resort - extract from path
for source in node_sources:
path = source.get('path', '')
if path:
extracted = self._extract_meaningful_attribute(path)
if extracted != "unknown":
return extracted
return "correlation"
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
"""
@@ -290,7 +359,7 @@ class GraphManager:
# Create set of unique sources based on (node_id, path) tuples
source_set = set()
for source in existing_sources + new_sources:
source_tuple = (source['node_id'], source['path'])
source_tuple = (source['node_id'], source.get('path', ''))
source_set.add(source_tuple)
# Convert back to list of dictionaries
@@ -345,6 +414,29 @@ class GraphManager:
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
def remove_node(self, node_id: str) -> bool:
"""Remove a node and its connected edges from the graph."""
if not self.graph.has_node(node_id):
return False
# Remove node from the graph (NetworkX handles removing connected edges)
self.graph.remove_node(node_id)
# Clean up the correlation index
keys_to_delete = []
for value, nodes in self.correlation_index.items():
if node_id in nodes:
del nodes[node_id]
if not nodes: # If no other nodes are associated with this value, remove it
keys_to_delete.append(value)
for key in keys_to_delete:
if key in self.correlation_index:
del self.correlation_index[key]
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
def get_node_count(self) -> int:
"""Get total number of nodes in the graph."""
return self.graph.number_of_nodes()
@@ -421,10 +513,14 @@ class GraphManager:
def _get_confidence_distribution(self) -> Dict[str, int]:
"""Get distribution of edge confidence scores."""
distribution = {'high': 0, 'medium': 0, 'low': 0}
for _, _, confidence in self.graph.edges(data='confidence_score', default=0):
if confidence >= 0.8: distribution['high'] += 1
elif confidence >= 0.6: distribution['medium'] += 1
else: distribution['low'] += 1
for _, _, data in self.graph.edges(data=True):
confidence = data.get('confidence_score', 0)
if confidence >= 0.8:
distribution['high'] += 1
elif confidence >= 0.6:
distribution['medium'] += 1
else:
distribution['low'] += 1
return distribution
def get_statistics(self) -> Dict[str, Any]:
@@ -439,9 +535,10 @@ class GraphManager:
# Calculate distributions
for node_type in NodeType:
stats['node_type_distribution'][node_type.value] = self.get_nodes_by_type(node_type).__len__()
for _, _, rel_type in self.graph.edges(data='relationship_type', default='unknown'):
for _, _, data in self.graph.edges(data=True):
rel_type = data.get('relationship_type', 'unknown')
stats['relationship_type_distribution'][rel_type] = stats['relationship_type_distribution'].get(rel_type, 0) + 1
for _, _, provider in self.graph.edges(data='source_provider', default='unknown'):
provider = data.get('source_provider', 'unknown')
stats['provider_distribution'][provider] = stats['provider_distribution'].get(provider, 0) + 1
return stats

View File

@@ -42,7 +42,7 @@ class ForensicLogger:
Maintains detailed audit trail of all reconnaissance activities.
"""
def __init__(self, session_id: str = None):
def __init__(self, session_id: str = ""):
"""
Initialize forensic logger.
@@ -50,7 +50,7 @@ class ForensicLogger:
session_id: Unique identifier for this reconnaissance session
"""
self.session_id = session_id or self._generate_session_id()
#self.lock = threading.Lock()
self.lock = threading.Lock()
# Initialize audit trail storage
self.api_requests: List[APIRequest] = []
@@ -86,6 +86,8 @@ class ForensicLogger:
# Remove the unpickleable 'logger' attribute
if 'logger' in state:
del state['logger']
if 'lock' in state:
del state['lock']
return state
def __setstate__(self, state):
@@ -101,6 +103,7 @@ class ForensicLogger:
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
self.logger.addHandler(console_handler)
self.lock = threading.Lock()
def _generate_session_id(self) -> str:
"""Generate unique session identifier."""
@@ -203,8 +206,6 @@ class ForensicLogger:
self.session_metadata['target_domains'] = list(self.session_metadata['target_domains'])
self.logger.info(f"Scan Complete - Session: {self.session_id}")
self.logger.info(f"Total API Requests: {self.session_metadata['total_requests']}")
self.logger.info(f"Total Relationships: {self.session_metadata['total_relationships']}")
def export_audit_trail(self) -> Dict[str, Any]:
"""

29
core/rate_limiter.py Normal file
View File

@@ -0,0 +1,29 @@
# dnsrecon-reduced/core/rate_limiter.py
import time
import redis
class GlobalRateLimiter:
def __init__(self, redis_client):
self.redis = redis_client
def is_rate_limited(self, key, limit, period):
"""
Check if a key is rate-limited.
"""
now = time.time()
key = f"rate_limit:{key}"
# Remove old timestamps
self.redis.zremrangebyscore(key, 0, now - period)
# Check the count
count = self.redis.zcard(key)
if count >= limit:
return True
# Add new timestamp
self.redis.zadd(key, {now: now})
self.redis.expire(key, period)
return False

View File

@@ -5,16 +5,18 @@ import traceback
import time
import os
import importlib
from typing import List, Set, Dict, Any, Tuple
import redis
from typing import List, Set, Dict, Any, Tuple, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, Future
from collections import defaultdict, deque
from collections import defaultdict
from queue import PriorityQueue
from datetime import datetime, timezone
from core.graph_manager import GraphManager, NodeType
from core.logger import get_forensic_logger, new_session
from utils.helpers import _is_valid_ip, _is_valid_domain
from providers.base_provider import BaseProvider
from core.rate_limiter import GlobalRateLimiter
class ScanStatus:
"""Enumeration of scan statuses."""
@@ -49,11 +51,21 @@ class Scanner:
self.max_depth = 2
self.stop_event = threading.Event()
self.scan_thread = None
self.session_id = None # Will be set by session manager
self.session_id: Optional[str] = None # Will be set by session manager
self.task_queue = PriorityQueue()
self.target_retries = defaultdict(int)
self.scan_failed_due_to_retries = False
# **NEW**: Track currently processing tasks to prevent processing after stop
self.currently_processing = set()
self.processing_lock = threading.Lock()
# Scanning progress tracking
self.total_indicators_found = 0
self.indicators_processed = 0
self.indicators_completed = 0
self.tasks_re_enqueued = 0
self.total_tasks_ever_enqueued = 0
self.current_indicator = ""
# Concurrent processing configuration
@@ -68,6 +80,9 @@ class Scanner:
print("Initializing forensic logger...")
self.logger = get_forensic_logger()
# Initialize global rate limiter
self.rate_limiter = GlobalRateLimiter(redis.StrictRedis(db=0))
print("Scanner initialization complete")
except Exception as e:
@@ -119,7 +134,11 @@ class Scanner:
unpicklable_attrs = [
'stop_event',
'scan_thread',
'executor'
'executor',
'processing_lock', # **NEW**: Exclude the processing lock
'task_queue', # PriorityQueue is not picklable
'rate_limiter',
'logger'
]
for attr in unpicklable_attrs:
@@ -143,6 +162,14 @@ class Scanner:
self.stop_event = threading.Event()
self.scan_thread = None
self.executor = None
self.processing_lock = threading.Lock() # **NEW**: Recreate processing lock
self.task_queue = PriorityQueue()
self.rate_limiter = GlobalRateLimiter(redis.StrictRedis(db=0))
self.logger = get_forensic_logger()
# **NEW**: Reset processing tracking
if not hasattr(self, 'currently_processing'):
self.currently_processing = set()
# Re-set stop events for providers
if hasattr(self, 'providers'):
@@ -165,9 +192,10 @@ class Scanner:
attribute = getattr(module, attribute_name)
if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider:
provider_class = attribute
provider_name = provider_class(session_config=self.config).get_name()
provider = provider_class(name=attribute_name, session_config=self.config)
provider_name = provider.get_name()
if self.config.is_provider_enabled(provider_name):
provider = provider_class(session_config=self.config)
if provider.is_available():
provider.set_stop_event(self.stop_event)
self.providers.append(provider)
@@ -188,29 +216,61 @@ class Scanner:
self._initialize_providers()
print("Session configuration updated")
def start_scan(self, target_domain: str, max_depth: int = 2, clear_graph: bool = True) -> bool:
"""Start a new reconnaissance scan with immediate GUI feedback."""
def start_scan(self, target: str, max_depth: int = 2, clear_graph: bool = True, force_rescan_target: Optional[str] = None) -> bool:
"""Start a new reconnaissance scan with proper cleanup of previous scans."""
print(f"=== STARTING SCAN IN SCANNER {id(self)} ===")
print(f"Session ID: {self.session_id}")
print(f"Initial scanner status: {self.status}")
self.total_tasks_ever_enqueued = 0
# Clean up previous scan thread if needed
# **IMPROVED**: More aggressive cleanup of previous scan
if self.scan_thread and self.scan_thread.is_alive():
print("A previous scan thread is still alive. Sending termination signal and waiting...")
self.stop_scan()
self.scan_thread.join(10.0)
print("A previous scan thread is still alive. Forcing termination...")
# Set stop signals immediately
self._set_stop_signal()
self.status = ScanStatus.STOPPED
# Clear all processing state
with self.processing_lock:
self.currently_processing.clear()
self.task_queue = PriorityQueue()
# Shutdown executor aggressively
if self.executor:
print("Shutting down executor forcefully...")
self.executor.shutdown(wait=False, cancel_futures=True)
self.executor = None
# Wait for thread termination with shorter timeout
print("Waiting for previous scan thread to terminate...")
self.scan_thread.join(5.0) # Reduced from 10 seconds
if self.scan_thread.is_alive():
print("ERROR: The previous scan thread is unresponsive and could not be stopped.")
self.status = ScanStatus.FAILED
self._update_session_state()
return False
print("Previous scan thread terminated successfully.")
print("WARNING: Previous scan thread is still alive after 5 seconds")
# Continue anyway, but log the issue
self.logger.logger.warning("Previous scan thread failed to terminate cleanly")
# Reset state for new scan
# Reset state for new scan with proper forensic logging
print("Resetting scanner state for new scan...")
self.status = ScanStatus.IDLE
self._update_session_state() # Update GUI immediately
print("Scanner state is now clean for a new scan.")
self.stop_event.clear()
# **NEW**: Clear Redis stop signal explicitly
if self.session_id:
from core.session_manager import session_manager
session_manager.clear_stop_signal(self.session_id)
with self.processing_lock:
self.currently_processing.clear()
self.task_queue = PriorityQueue()
self.target_retries.clear()
self.scan_failed_due_to_retries = False
# Update session state immediately for GUI feedback
self._update_session_state()
print("Scanner state reset complete.")
try:
if not hasattr(self, 'providers') or not self.providers:
@@ -221,28 +281,31 @@ class Scanner:
if clear_graph:
self.graph.clear()
self.current_target = target_domain.lower().strip()
if force_rescan_target and self.graph.graph.has_node(force_rescan_target):
print(f"Forcing rescan of {force_rescan_target}, clearing provider states.")
node_data = self.graph.graph.nodes[force_rescan_target]
if 'metadata' in node_data and 'provider_states' in node_data['metadata']:
node_data['metadata']['provider_states'] = {}
self.current_target = target.lower().strip()
self.max_depth = max_depth
self.current_depth = 0
# Clear both local and Redis stop signals
self.stop_event.clear()
if self.session_id:
from core.session_manager import session_manager
session_manager.clear_stop_signal(self.session_id)
self.total_indicators_found = 0
self.indicators_processed = 0
self.indicators_completed = 0
self.tasks_re_enqueued = 0
self.current_indicator = self.current_target
# Update GUI with scan preparation
# Update GUI with scan preparation state
self._update_session_state()
# Start new forensic session
print(f"Starting new forensic session for scanner {id(self)}...")
self.logger = new_session()
# Start scan in separate thread
# Start scan in a separate thread
print(f"Starting scan thread for scanner {id(self)}...")
self.scan_thread = threading.Thread(
target=self._execute_scan,
@@ -258,54 +321,124 @@ class Scanner:
print(f"ERROR: Exception in start_scan for scanner {id(self)}: {e}")
traceback.print_exc()
self.status = ScanStatus.FAILED
self._update_session_state() # Update failed status immediately
self._update_session_state()
return False
def _execute_scan(self, target_domain: str, max_depth: int) -> None:
"""Execute the reconnaissance scan using a task queue-based approach."""
print(f"_execute_scan started for {target_domain} with depth {max_depth}")
self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
processed_targets = set()
def _get_priority(self, provider_name):
rate_limit = self.config.get_rate_limit(provider_name)
if rate_limit > 90:
return 1 # Highest priority
elif rate_limit > 50:
return 2
else:
return 3 # Lowest priority
task_queue = deque([(target_domain, 0, False)]) # target, depth, is_large_entity_member
def _execute_scan(self, target: str, max_depth: int) -> None:
"""Execute the reconnaissance scan with proper termination handling."""
print(f"_execute_scan started for {target} with depth {max_depth}")
self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
processed_tasks = set()
# Initial task population for the main target
is_ip = _is_valid_ip(target)
initial_providers = self._get_eligible_providers(target, is_ip, False)
for provider in initial_providers:
provider_name = provider.get_name()
self.task_queue.put((self._get_priority(provider_name), (provider_name, target, 0)))
self.total_tasks_ever_enqueued += 1 # <<< FIX: INCREMENT HERE
try:
self.status = ScanStatus.RUNNING
self._update_session_state()
enabled_providers = [provider.get_name() for provider in self.providers]
self.logger.log_scan_start(target_domain, max_depth, enabled_providers)
self.graph.add_node(target_domain, NodeType.DOMAIN)
self._initialize_provider_states(target_domain)
self.logger.log_scan_start(target, max_depth, enabled_providers)
while task_queue:
if self._is_stop_requested():
print("Stop requested, terminating scan.")
# Determine initial node type
node_type = NodeType.IP if is_ip else NodeType.DOMAIN
self.graph.add_node(target, node_type)
self._initialize_provider_states(target)
# Better termination checking in main loop
while not self.task_queue.empty() and not self._is_stop_requested():
try:
priority, (provider_name, target_item, depth) = self.task_queue.get()
except IndexError:
# Queue became empty during processing
break
target, depth, is_large_entity_member = task_queue.popleft()
if target in processed_targets:
task_tuple = (provider_name, target_item)
if task_tuple in processed_tasks:
continue
if depth > max_depth:
continue
self.current_depth = depth
self.current_indicator = target
self._update_session_state()
if self.rate_limiter.is_rate_limited(provider_name, self.config.get_rate_limit(provider_name), 60):
self.task_queue.put((priority + 1, (provider_name, target_item, depth))) # Postpone
continue
new_targets, large_entity_members = self._query_providers_for_target(target, depth, is_large_entity_member)
processed_targets.add(target)
with self.processing_lock:
if self._is_stop_requested():
print(f"Stop requested before processing {target_item}")
break
self.currently_processing.add(target_item)
for new_target in new_targets:
if new_target not in processed_targets:
task_queue.append((new_target, depth + 1, False))
try:
self.current_depth = depth
self.current_indicator = target_item
self._update_session_state()
for member in large_entity_members:
if member not in processed_targets:
task_queue.append((member, depth, True))
if self._is_stop_requested():
print(f"Stop requested during processing setup for {target_item}")
break
provider = next((p for p in self.providers if p.get_name() == provider_name), None)
if provider:
new_targets, large_entity_members, success = self._query_single_provider_for_target(provider, target_item, depth)
if self._is_stop_requested():
print(f"Stop requested after querying providers for {target_item}")
break
if not success:
self.target_retries[task_tuple] += 1
if self.target_retries[task_tuple] <= self.config.max_retries_per_target:
print(f"Re-queueing task {task_tuple} (attempt {self.target_retries[task_tuple]})")
self.task_queue.put((priority, (provider_name, target_item, depth)))
self.tasks_re_enqueued += 1
self.total_tasks_ever_enqueued += 1 # <<< FIX: INCREMENT HERE
else:
print(f"ERROR: Max retries exceeded for task {task_tuple}")
self.scan_failed_due_to_retries = True
self._log_target_processing_error(str(task_tuple), "Max retries exceeded")
else:
processed_tasks.add(task_tuple)
self.indicators_completed += 1
if not self._is_stop_requested():
all_new_targets = new_targets.union(large_entity_members)
for new_target in all_new_targets:
is_ip_new = _is_valid_ip(new_target)
eligible_providers_new = self._get_eligible_providers(new_target, is_ip_new, False)
for p_new in eligible_providers_new:
p_name_new = p_new.get_name()
if (p_name_new, new_target) not in processed_tasks:
new_depth = depth + 1 if new_target in new_targets else depth
self.task_queue.put((self._get_priority(p_name_new), (p_name_new, new_target, new_depth)))
self.total_tasks_ever_enqueued += 1 # <<< FIX: INCREMENT HERE
finally:
with self.processing_lock:
self.currently_processing.discard(target_item)
if self._is_stop_requested():
print("Scan terminated due to stop request")
self.logger.logger.info("Scan terminated by user request")
elif self.task_queue.empty():
print("Scan completed - no more targets to process")
self.logger.logger.info("Scan completed - all targets processed")
except Exception as e:
print(f"ERROR: Scan execution failed with error: {e}")
@@ -313,8 +446,13 @@ class Scanner:
self.status = ScanStatus.FAILED
self.logger.logger.error(f"Scan failed: {e}")
finally:
with self.processing_lock:
self.currently_processing.clear()
if self._is_stop_requested():
self.status = ScanStatus.STOPPED
elif self.scan_failed_due_to_retries:
self.status = ScanStatus.FAILED
else:
self.status = ScanStatus.COMPLETED
@@ -322,21 +460,21 @@ class Scanner:
self.logger.log_scan_complete()
if self.executor:
self.executor.shutdown(wait=False, cancel_futures=True)
self.executor = None
stats = self.graph.get_statistics()
print("Final scan statistics:")
print(f" - Total nodes: {stats['basic_metrics']['total_nodes']}")
print(f" - Total edges: {stats['basic_metrics']['total_edges']}")
print(f" - Targets processed: {len(processed_targets)}")
print(f" - Tasks processed: {len(processed_tasks)}")
def _query_single_provider_for_target(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]:
if self._is_stop_requested():
print(f"Stop requested before querying {provider.get_name()} for {target}")
return set(), set(), False
def _query_providers_for_target(self, target: str, depth: int, dns_only: bool = False) -> Tuple[Set[str], Set[str]]:
"""Helper method to query providers for a single target."""
is_ip = _is_valid_ip(target)
target_type = NodeType.IP if is_ip else NodeType.DOMAIN
print(f"Querying providers for {target_type.value}: {target} at depth {depth}")
if self._is_stop_requested():
print(f"Stop requested before querying providers for {target}")
return set(), set()
print(f"Querying {provider.get_name()} for {target_type.value}: {target} at depth {depth}")
self.graph.add_node(target, target_type)
self._initialize_provider_states(target)
@@ -344,38 +482,79 @@ class Scanner:
new_targets = set()
large_entity_members = set()
node_attributes = defaultdict(lambda: defaultdict(list))
provider_successful = True
eligible_providers = self._get_eligible_providers(target, is_ip, dns_only)
try:
provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
if provider_results is None:
provider_successful = False
elif not self._is_stop_requested():
discovered, is_large_entity = self._process_provider_results_forensic(
target, provider, provider_results, node_attributes, depth
)
if is_large_entity:
large_entity_members.update(discovered)
else:
new_targets.update(discovered)
else:
print(f"Stop requested after processing results from {provider.get_name()}")
except Exception as e:
provider_successful = False
self._log_provider_error(target, provider.get_name(), str(e))
if not eligible_providers:
self._log_no_eligible_providers(target, is_ip)
return new_targets, large_entity_members
if not self._is_stop_requested():
for node_id, attributes in node_attributes.items():
if self.graph.graph.has_node(node_id):
node_is_ip = _is_valid_ip(node_id)
node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
self.graph.add_node(node_id, node_type_to_add, attributes=attributes)
for provider in eligible_providers:
if self._is_stop_requested():
print(f"Stop requested while querying providers for {target}")
break
return new_targets, large_entity_members, provider_successful
try:
provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
if provider_results and not self._is_stop_requested():
discovered, is_large_entity = self._process_provider_results_forensic(
target, provider, provider_results, node_attributes, depth
)
if is_large_entity:
large_entity_members.update(discovered)
else:
new_targets.update(discovered)
except Exception as e:
self._log_provider_error(target, provider.get_name(), str(e))
def stop_scan(self) -> bool:
"""Request immediate scan termination with proper cleanup."""
try:
print("=== INITIATING IMMEDIATE SCAN TERMINATION ===")
self.logger.logger.info("Scan termination requested by user")
for node_id, attributes in node_attributes.items():
if self.graph.graph.has_node(node_id):
node_is_ip = _is_valid_ip(node_id)
node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
self.graph.add_node(node_id, node_type_to_add, attributes=attributes)
# **IMPROVED**: More aggressive stop signal setting
self._set_stop_signal()
self.status = ScanStatus.STOPPED
return new_targets, large_entity_members
# **NEW**: Clear processing state immediately
with self.processing_lock:
currently_processing_copy = self.currently_processing.copy()
self.currently_processing.clear()
print(f"Cleared {len(currently_processing_copy)} currently processing targets: {currently_processing_copy}")
# **IMPROVED**: Clear task queue and log what was discarded
discarded_tasks = []
while not self.task_queue.empty():
discarded_tasks.append(self.task_queue.get())
self.task_queue = PriorityQueue()
print(f"Discarded {len(discarded_tasks)} pending tasks")
# **IMPROVED**: Aggressively shut down executor
if self.executor:
print("Shutting down executor with immediate cancellation...")
try:
# Cancel all pending futures
self.executor.shutdown(wait=False, cancel_futures=True)
print("Executor shutdown completed")
except Exception as e:
print(f"Error during executor shutdown: {e}")
# Immediately update GUI with stopped status
self._update_session_state()
print("Termination signals sent. The scan will stop as soon as possible.")
return True
except Exception as e:
print(f"ERROR: Exception in stop_scan: {e}")
self.logger.logger.error(f"Error during scan termination: {e}")
traceback.print_exc()
return False
def _update_session_state(self) -> None:
"""
@@ -391,6 +570,50 @@ class Scanner:
except Exception as e:
print(f"ERROR: Failed to update session state: {e}")
def get_scan_status(self) -> Dict[str, Any]:
"""Get current scan status with processing information."""
try:
with self.processing_lock:
currently_processing_count = len(self.currently_processing)
currently_processing_list = list(self.currently_processing)
return {
'status': self.status,
'target_domain': self.current_target,
'current_depth': self.current_depth,
'max_depth': self.max_depth,
'current_indicator': self.current_indicator,
'indicators_processed': self.indicators_processed,
'indicators_completed': self.indicators_completed,
'tasks_re_enqueued': self.tasks_re_enqueued,
'progress_percentage': self._calculate_progress(),
'total_tasks_ever_enqueued': self.total_tasks_ever_enqueued,
'enabled_providers': [provider.get_name() for provider in self.providers],
'graph_statistics': self.graph.get_statistics(),
'task_queue_size': self.task_queue.qsize(),
'currently_processing_count': currently_processing_count,
'currently_processing': currently_processing_list[:5]
}
except Exception as e:
print(f"ERROR: Exception in get_scan_status: {e}")
traceback.print_exc()
return {
'status': 'error',
'target_domain': None,
'current_depth': 0,
'max_depth': 0,
'current_indicator': '',
'indicators_processed': 0,
'indicators_completed': 0,
'tasks_re_enqueued': 0,
'progress_percentage': 0.0,
'enabled_providers': [],
'graph_statistics': {},
'task_queue_size': 0,
'currently_processing_count': 0,
'currently_processing': []
}
def _initialize_provider_states(self, target: str) -> None:
"""Initialize provider states for forensic tracking."""
if not self.graph.graph.has_node(target):
@@ -420,22 +643,25 @@ class Scanner:
return eligible
def _already_queried_provider(self, target: str, provider_name: str) -> bool:
"""Check if we already queried a provider for a target."""
"""Check if we already successfully queried a provider for a target."""
if not self.graph.graph.has_node(target):
return False
node_data = self.graph.graph.nodes[target]
provider_states = node_data.get('metadata', {}).get('provider_states', {})
return provider_name in provider_states
def _query_single_provider_forensic(self, provider, target: str, is_ip: bool, current_depth: int) -> List:
# A provider has been successfully queried if a state exists and its status is 'success'
provider_state = provider_states.get(provider_name)
return provider_state is not None and provider_state.get('status') == 'success'
def _query_single_provider_forensic(self, provider, target: str, is_ip: bool, current_depth: int) -> Optional[List]:
"""Query a single provider with stop signal checking."""
provider_name = provider.get_name()
start_time = datetime.now(timezone.utc)
if self._is_stop_requested():
print(f"Stop requested before querying {provider_name} for {target}")
return []
return None
print(f"Querying {provider_name} for {target}")
@@ -449,7 +675,7 @@ class Scanner:
if self._is_stop_requested():
print(f"Stop requested after querying {provider_name} for {target}")
return []
return None
self._update_provider_state(target, provider_name, 'success', len(results), None, start_time)
@@ -459,10 +685,10 @@ class Scanner:
except Exception as e:
self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
print(f"{provider_name} failed for {target}: {e}")
return []
return None
def _update_provider_state(self, target: str, provider_name: str, status: str,
results_count: int, error: str, start_time: datetime) -> None:
results_count: int, error: Optional[str], start_time: datetime) -> None:
"""Update provider state in node metadata for forensic tracking."""
if not self.graph.graph.has_node(target):
return
@@ -499,7 +725,7 @@ class Scanner:
return members, True
for i, (source, rel_target, rel_type, confidence, raw_data) in enumerate(results):
if i % 10 == 0 and self._is_stop_requested():
if i % 5 == 0 and self._is_stop_requested(): # Check more frequently
print(f"Stop requested while processing results from {provider_name} for {target}")
break
@@ -515,7 +741,22 @@ class Scanner:
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
if _is_valid_ip(rel_target):
if isinstance(rel_target, list):
# If the target is a list, iterate and process each item
for single_target in rel_target:
if _is_valid_ip(single_target):
self.graph.add_node(single_target, NodeType.IP)
if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data):
print(f"Added IP relationship: {source} -> {single_target} ({rel_type})")
discovered_targets.add(single_target)
elif _is_valid_domain(single_target):
self.graph.add_node(single_target, NodeType.DOMAIN)
if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data):
print(f"Added domain relationship: {source} -> {single_target} ({rel_type})")
discovered_targets.add(single_target)
self._collect_node_attributes(single_target, provider_name, rel_type, source, raw_data, node_attributes[single_target])
elif _is_valid_ip(rel_target):
self.graph.add_node(rel_target, NodeType.IP)
if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
print(f"Added IP relationship: {source} -> {rel_target} ({rel_type})")
@@ -621,7 +862,6 @@ class Scanner:
if target not in attributes[record_type_name]:
attributes[record_type_name].append(target)
def _log_target_processing_error(self, target: str, error: str) -> None:
"""Log target processing errors for forensic trail."""
self.logger.logger.error(f"Target processing failed for {target}: {error}")
@@ -635,69 +875,11 @@ class Scanner:
target_type = 'IP' if is_ip else 'domain'
self.logger.logger.warning(f"No eligible providers for {target_type}: {target}")
def stop_scan(self) -> bool:
"""Request immediate scan termination with immediate GUI feedback."""
try:
print("=== INITIATING IMMEDIATE SCAN TERMINATION ===")
self.logger.logger.info("Scan termination requested by user")
# Set both local and Redis stop signals
self._set_stop_signal()
self.status = ScanStatus.STOPPED
# Immediately update GUI with stopped status
self._update_session_state()
# Cancel executor futures if running
if self.executor:
print("Shutting down executor with immediate cancellation...")
self.executor.shutdown(wait=False, cancel_futures=True)
print("Termination signals sent. The scan will stop as soon as possible.")
return True
except Exception as e:
print(f"ERROR: Exception in stop_scan: {e}")
self.logger.logger.error(f"Error during scan termination: {e}")
traceback.print_exc()
return False
def get_scan_status(self) -> Dict[str, Any]:
"""Get current scan status with forensic information."""
try:
return {
'status': self.status,
'target_domain': self.current_target,
'current_depth': self.current_depth,
'max_depth': self.max_depth,
'current_indicator': self.current_indicator,
'total_indicators_found': self.total_indicators_found,
'indicators_processed': self.indicators_processed,
'progress_percentage': self._calculate_progress(),
'enabled_providers': [provider.get_name() for provider in self.providers],
'graph_statistics': self.graph.get_statistics()
}
except Exception as e:
print(f"ERROR: Exception in get_scan_status: {e}")
traceback.print_exc()
return {
'status': 'error',
'target_domain': None,
'current_depth': 0,
'max_depth': 0,
'current_indicator': '',
'total_indicators_found': 0,
'indicators_processed': 0,
'progress_percentage': 0.0,
'enabled_providers': [],
'graph_statistics': {}
}
def _calculate_progress(self) -> float:
"""Calculate scan progress percentage."""
if self.total_indicators_found == 0:
"""Calculate scan progress percentage based on task completion."""
if self.total_tasks_ever_enqueued == 0:
return 0.0
return min(100.0, (self.indicators_processed / self.total_indicators_found) * 100)
return min(100.0, (self.indicators_completed / self.total_tasks_ever_enqueued) * 100)
def get_graph_data(self) -> Dict[str, Any]:
"""Get current graph data for visualization."""
@@ -748,7 +930,7 @@ class Scanner:
if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider:
provider_class = attribute
# Instantiate to get metadata, even if not fully configured
temp_provider = provider_class(session_config=self.config)
temp_provider = provider_class(name=attribute_name, session_config=self.config)
provider_name = temp_provider.get_name()
# Find the actual provider instance if it exists, to get live stats

View File

@@ -3,11 +3,9 @@ Per-session configuration management for DNSRecon.
Provides isolated configuration instances for each user session.
"""
import os
from typing import Dict, Optional
from config import Config
class SessionConfig:
class SessionConfig(Config):
"""
Session-specific configuration that inherits from global config
but maintains isolated API keys and provider settings.
@@ -15,106 +13,8 @@ class SessionConfig:
def __init__(self):
"""Initialize session config with global defaults."""
# Copy all attributes from global config
self.api_keys: Dict[str, Optional[str]] = {
'shodan': None
}
super().__init__()
# Default settings (copied from global config)
self.default_recursion_depth = 2
self.default_timeout = 30
self.max_concurrent_requests = 5
self.large_entity_threshold = 100
# Rate limiting settings (per session)
self.rate_limits = {
'crtsh': 60,
'shodan': 60,
'dns': 100
}
# Provider settings (per session)
self.enabled_providers = {
'crtsh': True,
'dns': True,
'shodan': False
}
# Logging configuration
self.log_level = 'INFO'
self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
# Flask configuration (shared)
self.flask_host = '127.0.0.1'
self.flask_port = 5000
self.flask_debug = True
def set_api_key(self, provider: str, api_key: str) -> bool:
"""
Set API key for a provider in this session.
Args:
provider: Provider name (shodan, etc)
api_key: API key string
Returns:
bool: True if key was set successfully
"""
if provider in self.api_keys:
self.api_keys[provider] = api_key
self.enabled_providers[provider] = True if api_key else False
return True
return False
def get_api_key(self, provider: str) -> Optional[str]:
"""
Get API key for a provider in this session.
Args:
provider: Provider name
Returns:
API key or None if not set
"""
return self.api_keys.get(provider)
def is_provider_enabled(self, provider: str) -> bool:
"""
Check if a provider is enabled in this session.
Args:
provider: Provider name
Returns:
bool: True if provider is enabled
"""
return self.enabled_providers.get(provider, False)
def get_rate_limit(self, provider: str) -> int:
"""
Get rate limit for a provider in this session.
Args:
provider: Provider name
Returns:
Rate limit in requests per minute
"""
return self.rate_limits.get(provider, 60)
def load_from_env(self):
"""Load configuration from environment variables (only if not already set)."""
if os.getenv('SHODAN_API_KEY') and not self.api_keys['shodan']:
self.set_api_key('shodan', os.getenv('SHODAN_API_KEY'))
# Override default settings from environment
self.default_recursion_depth = int(os.getenv('DEFAULT_RECURSION_DEPTH', '2'))
self.default_timeout = 30
self.max_concurrent_requests = 5
def create_session_config() -> SessionConfig:
def create_session_config() -> 'SessionConfig':
"""Create a new session configuration instance."""
session_config = SessionConfig()
session_config.load_from_env()
return session_config
return SessionConfig()

View File

@@ -8,6 +8,7 @@ import pickle
from typing import Dict, Optional, Any, List
from core.scanner import Scanner
from config import config
# WARNING: Using pickle can be a security risk if the data source is not trusted.
# In this case, we are only serializing/deserializing our own trusted Scanner objects,
@@ -18,10 +19,13 @@ class SessionManager:
Manages multiple scanner instances for concurrent user sessions using Redis.
"""
def __init__(self, session_timeout_minutes: int = 60):
def __init__(self, session_timeout_minutes: int = 0):
"""
Initialize session manager with a Redis backend.
"""
if session_timeout_minutes is None:
session_timeout_minutes = config.session_timeout_minutes
self.redis_client = redis.StrictRedis(db=0, decode_responses=False)
self.session_timeout = session_timeout_minutes * 60 # Convert to seconds
self.lock = threading.Lock() # Lock for local operations, Redis handles atomic ops
@@ -355,31 +359,6 @@ class SessionManager:
time.sleep(300) # Sleep for 5 minutes
def list_active_sessions(self) -> List[Dict[str, Any]]:
"""List all active sessions for admin purposes."""
try:
session_keys = self.redis_client.keys("dnsrecon:session:*")
sessions = []
for session_key in session_keys:
session_id = session_key.decode('utf-8').split(':')[-1]
session_data = self._get_session_data(session_id)
if session_data:
scanner = session_data.get('scanner')
sessions.append({
'session_id': session_id,
'created_at': session_data.get('created_at'),
'last_activity': session_data.get('last_activity'),
'scanner_status': scanner.status if scanner else 'unknown',
'current_target': scanner.current_target if scanner else None
})
return sessions
except Exception as e:
print(f"ERROR: Failed to list active sessions: {e}")
return []
def get_statistics(self) -> Dict[str, Any]:
"""Get session manager statistics."""
try:

BIN
dump.rdb

Binary file not shown.

View File

@@ -3,14 +3,15 @@ Data provider modules for DNSRecon.
Contains implementations for various reconnaissance data sources.
"""
from .base_provider import BaseProvider, RateLimiter
from .base_provider import BaseProvider
from .crtsh_provider import CrtShProvider
from .dns_provider import DNSProvider
from .shodan_provider import ShodanProvider
from core.rate_limiter import GlobalRateLimiter
__all__ = [
'BaseProvider',
'RateLimiter',
'GlobalRateLimiter',
'CrtShProvider',
'DNSProvider',
'ShodanProvider'

View File

@@ -3,46 +3,12 @@
import time
import requests
import threading
import os
import json
import redis
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Tuple
from core.logger import get_forensic_logger
class RateLimiter:
"""Simple rate limiter for API calls."""
def __init__(self, requests_per_minute: int):
"""
Initialize rate limiter.
Args:
requests_per_minute: Maximum requests allowed per minute
"""
self.requests_per_minute = requests_per_minute
self.min_interval = 60.0 / requests_per_minute
self.last_request_time = 0
def __getstate__(self):
"""RateLimiter is fully picklable, return full state."""
return self.__dict__.copy()
def __setstate__(self, state):
"""Restore RateLimiter state."""
self.__dict__.update(state)
def wait_if_needed(self) -> None:
"""Wait if necessary to respect rate limits."""
current_time = time.time()
time_since_last = current_time - self.last_request_time
if time_since_last < self.min_interval:
sleep_time = self.min_interval - time_since_last
time.sleep(sleep_time)
self.last_request_time = time.time()
from core.rate_limiter import GlobalRateLimiter
class BaseProvider(ABC):
@@ -74,26 +40,17 @@ class BaseProvider(ABC):
actual_timeout = timeout
self.name = name
self.rate_limiter = RateLimiter(actual_rate_limit)
self.timeout = actual_timeout
self._local = threading.local()
self.logger = get_forensic_logger()
self._stop_event = None
# Caching configuration (per session)
self.cache_dir = f'.cache/{id(self.config)}' # Unique cache per session config
self.cache_expiry = 12 * 3600 # 12 hours in seconds
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
# Statistics (per provider instance)
self.total_requests = 0
self.successful_requests = 0
self.failed_requests = 0
self.total_relationships_found = 0
print(f"Initialized {name} provider with session-specific config (rate: {actual_rate_limit}/min)")
def __getstate__(self):
"""Prepare BaseProvider for pickling by excluding unpicklable objects."""
state = self.__dict__.copy()
@@ -174,171 +131,77 @@ class BaseProvider(ABC):
def make_request(self, url: str, method: str = "GET",
params: Optional[Dict[str, Any]] = None,
headers: Optional[Dict[str, str]] = None,
target_indicator: str = "",
max_retries: int = 3) -> Optional[requests.Response]:
target_indicator: str = "") -> Optional[requests.Response]:
"""
Make a rate-limited HTTP request with aggressive stop signal handling.
Terminates immediately when stop is requested, including during retries.
Make a rate-limited HTTP request.
"""
# Check for cancellation before starting
if self._is_stop_requested():
print(f"Request cancelled before start: {url}")
return None
# Create a unique cache key
cache_key = f"{self.name}_{hash(f'{method}:{url}:{json.dumps(params, sort_keys=True)}')}.json"
cache_path = os.path.join(self.cache_dir, cache_key)
start_time = time.time()
response = None
error = None
# Check cache
if os.path.exists(cache_path):
cache_age = time.time() - os.path.getmtime(cache_path)
if cache_age < self.cache_expiry:
print(f"Returning cached response for: {url}")
with open(cache_path, 'r') as f:
cached_data = json.load(f)
response = requests.Response()
response.status_code = cached_data['status_code']
response._content = cached_data['content'].encode('utf-8')
response.headers = cached_data['headers']
return response
try:
self.total_requests += 1
# Determine effective max_retries based on stop signal
effective_max_retries = 0 if self._is_stop_requested() else max_retries
last_exception = None
request_headers = dict(self.session.headers).copy()
if headers:
request_headers.update(headers)
for attempt in range(effective_max_retries + 1):
# AGGRESSIVE: Check for cancellation before each attempt
if self._is_stop_requested():
print(f"Request cancelled during attempt {attempt + 1}: {url}")
return None
print(f"Making {method} request to: {url}")
# Apply rate limiting with cancellation awareness
if not self._wait_with_cancellation_check():
print(f"Request cancelled during rate limiting: {url}")
return None
# AGGRESSIVE: Final check before making HTTP request
if self._is_stop_requested():
print(f"Request cancelled before HTTP call: {url}")
return None
start_time = time.time()
response = None
error = None
try:
self.total_requests += 1
# Prepare request
request_headers = self.session.headers.copy()
if headers:
request_headers.update(headers)
print(f"Making {method} request to: {url} (attempt {attempt + 1})")
# AGGRESSIVE: Use much shorter timeout if termination is requested
request_timeout = self.timeout
if self._is_stop_requested():
request_timeout = 2 # Max 2 seconds if termination requested
print(f"Stop requested - using short timeout: {request_timeout}s")
# Make request
if method.upper() == "GET":
response = self.session.get(
url,
params=params,
headers=request_headers,
timeout=request_timeout
)
elif method.upper() == "POST":
response = self.session.post(
url,
json=params,
headers=request_headers,
timeout=request_timeout
)
else:
raise ValueError(f"Unsupported HTTP method: {method}")
print(f"Response status: {response.status_code}")
response.raise_for_status()
self.successful_requests += 1
# Success - log, cache, and return
duration_ms = (time.time() - start_time) * 1000
self.logger.log_api_request(
provider=self.name,
url=url,
method=method.upper(),
status_code=response.status_code,
response_size=len(response.content),
duration_ms=duration_ms,
error=None,
target_indicator=target_indicator
if method.upper() == "GET":
response = self.session.get(
url,
params=params,
headers=request_headers,
timeout=self.timeout
)
# Cache the successful response to disk
with open(cache_path, 'w') as f:
json.dump({
'status_code': response.status_code,
'content': response.text,
'headers': dict(response.headers)
}, f)
return response
elif method.upper() == "POST":
response = self.session.post(
url,
json=params,
headers=request_headers,
timeout=self.timeout
)
else:
raise ValueError(f"Unsupported HTTP method: {method}")
except requests.exceptions.RequestException as e:
error = str(e)
self.failed_requests += 1
print(f"Request failed (attempt {attempt + 1}): {error}")
last_exception = e
print(f"Response status: {response.status_code}")
response.raise_for_status()
self.successful_requests += 1
# AGGRESSIVE: Immediately abort retries if stop requested
if self._is_stop_requested():
print(f"Stop requested - aborting retries for: {url}")
break
duration_ms = (time.time() - start_time) * 1000
self.logger.log_api_request(
provider=self.name,
url=url,
method=method.upper(),
status_code=response.status_code,
response_size=len(response.content),
duration_ms=duration_ms,
error=None,
target_indicator=target_indicator
)
# Check if we should retry (but only if stop not requested)
if attempt < effective_max_retries and self._should_retry(e):
# Use a longer, more respectful backoff for 429 errors
if isinstance(e, requests.exceptions.HTTPError) and e.response and e.response.status_code == 429:
# Start with a 10-second backoff and increase exponentially
backoff_time = 10 * (2 ** attempt)
print(f"Rate limit hit. Retrying in {backoff_time} seconds...")
else:
backoff_time = min(1.0, (2 ** attempt) * 0.5) # Shorter backoff for other errors
print(f"Retrying in {backoff_time} seconds...")
return response
# AGGRESSIVE: Much shorter backoff and more frequent checking
if not self._sleep_with_cancellation_check(backoff_time):
print(f"Stop requested during backoff - aborting: {url}")
return None
continue
else:
break
except Exception as e:
error = f"Unexpected error: {str(e)}"
self.failed_requests += 1
print(f"Unexpected error: {error}")
last_exception = e
break
# All attempts failed - log and return None
duration_ms = (time.time() - start_time) * 1000
self.logger.log_api_request(
provider=self.name,
url=url,
method=method.upper(),
status_code=response.status_code if response else None,
response_size=len(response.content) if response else None,
duration_ms=duration_ms,
error=error,
target_indicator=target_indicator
)
if error and last_exception:
raise last_exception
return None
except requests.exceptions.RequestException as e:
error = str(e)
self.failed_requests += 1
duration_ms = (time.time() - start_time) * 1000
self.logger.log_api_request(
provider=self.name,
url=url,
method=method.upper(),
status_code=response.status_code if response else None,
response_size=len(response.content) if response else None,
duration_ms=duration_ms,
error=error,
target_indicator=target_indicator
)
raise e
def _is_stop_requested(self) -> bool:
"""
@@ -348,44 +211,6 @@ class BaseProvider(ABC):
return True
return False
def _wait_with_cancellation_check(self) -> bool:
"""
Wait for rate limiting while aggressively checking for cancellation.
Returns False if cancelled during wait.
"""
current_time = time.time()
time_since_last = current_time - self.rate_limiter.last_request_time
if time_since_last < self.rate_limiter.min_interval:
sleep_time = self.rate_limiter.min_interval - time_since_last
if not self._sleep_with_cancellation_check(sleep_time):
return False
self.rate_limiter.last_request_time = time.time()
return True
def _sleep_with_cancellation_check(self, sleep_time: float) -> bool:
"""
Sleep for the specified time while aggressively checking for cancellation.
Args:
sleep_time: Time to sleep in seconds
Returns:
bool: True if sleep completed, False if cancelled
"""
sleep_start = time.time()
check_interval = 0.05 # Check every 50ms for aggressive responsiveness
while time.time() - sleep_start < sleep_time:
if self._is_stop_requested():
return False
remaining_time = sleep_time - (time.time() - sleep_start)
time.sleep(min(check_interval, remaining_time))
return True
def set_stop_event(self, stop_event: threading.Event) -> None:
"""
Set the stop event for this provider to enable cancellation.
@@ -395,28 +220,6 @@ class BaseProvider(ABC):
"""
self._stop_event = stop_event
def _should_retry(self, exception: requests.exceptions.RequestException) -> bool:
"""
Determine if a request should be retried based on the exception.
Args:
exception: The request exception that occurred
Returns:
True if the request should be retried
"""
# Retry on connection errors and timeouts
if isinstance(exception, (requests.exceptions.ConnectionError,
requests.exceptions.Timeout)):
return True
if isinstance(exception, requests.exceptions.HTTPError):
if hasattr(exception, 'response') and exception.response:
# Retry on server errors (5xx) AND on rate-limiting errors (429)
return exception.response.status_code >= 500 or exception.response.status_code == 429
return False
def log_relationship_discovery(self, source_node: str, target_node: str,
relationship_type: str,
confidence_score: float,
@@ -459,5 +262,5 @@ class BaseProvider(ABC):
'failed_requests': self.failed_requests,
'success_rate': (self.successful_requests / self.total_requests * 100) if self.total_requests > 0 else 0,
'relationships_found': self.total_relationships_found,
'rate_limit': self.rate_limiter.requests_per_minute
'rate_limit': self.config.get_rate_limit(self.name)
}

View File

@@ -1,11 +1,9 @@
"""
Certificate Transparency provider using crt.sh.
Discovers domain relationships through certificate SAN analysis with comprehensive certificate tracking.
Stores certificates as metadata on domain nodes rather than creating certificate nodes.
"""
# dnsrecon/providers/crtsh_provider.py
import json
import re
import os
from pathlib import Path
from typing import List, Dict, Any, Tuple, Set
from urllib.parse import quote
from datetime import datetime, timezone
@@ -18,10 +16,10 @@ from utils.helpers import _is_valid_domain
class CrtShProvider(BaseProvider):
"""
Provider for querying crt.sh certificate transparency database.
Now uses session-specific configuration and caching.
Now uses session-specific configuration and caching with accumulative behavior.
"""
def __init__(self, session_config=None):
def __init__(self, name=None, session_config=None):
"""Initialize CrtSh provider with session-specific configuration."""
super().__init__(
name="crtsh",
@@ -32,6 +30,10 @@ class CrtShProvider(BaseProvider):
self.base_url = "https://crt.sh/"
self._stop_event = None
# Initialize cache directory
self.cache_dir = Path('cache') / 'crtsh'
self.cache_dir.mkdir(parents=True, exist_ok=True)
def get_name(self) -> str:
"""Return the provider name."""
return "crtsh"
@@ -56,6 +58,158 @@ class CrtShProvider(BaseProvider):
"""
return True
def _get_cache_file_path(self, domain: str) -> Path:
"""Generate cache file path for a domain."""
# Sanitize domain for filename safety
safe_domain = domain.replace('.', '_').replace('/', '_').replace('\\', '_')
return self.cache_dir / f"{safe_domain}.json"
def _get_cache_status(self, cache_file_path: Path) -> str:
"""
Check cache status for a domain.
Returns: 'not_found', 'fresh', or 'stale'
"""
if not cache_file_path.exists():
return "not_found"
try:
with open(cache_file_path, 'r') as f:
cache_data = json.load(f)
last_query_str = cache_data.get("last_upstream_query")
if not last_query_str:
return "stale" # Invalid cache format
last_query = datetime.fromisoformat(last_query_str.replace('Z', '+00:00'))
hours_since_query = (datetime.now(timezone.utc) - last_query).total_seconds() / 3600
cache_timeout = self.config.cache_timeout_hours
if hours_since_query < cache_timeout:
return "fresh"
else:
return "stale"
except (json.JSONDecodeError, ValueError, KeyError) as e:
self.logger.logger.warning(f"Invalid cache file format for {cache_file_path}: {e}")
return "stale"
def _load_cached_certificates(self, cache_file_path: Path) -> List[Dict[str, Any]]:
"""Load certificates from cache file."""
try:
with open(cache_file_path, 'r') as f:
cache_data = json.load(f)
return cache_data.get('certificates', [])
except (json.JSONDecodeError, FileNotFoundError, KeyError) as e:
self.logger.logger.error(f"Failed to load cached certificates from {cache_file_path}: {e}")
return []
def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]:
"""
Query crt.sh API for raw certificate data.
Raises exceptions for network errors to allow core logic to retry.
"""
url = f"{self.base_url}?q={quote(domain)}&output=json"
response = self.make_request(url, target_indicator=domain)
if not response or response.status_code != 200:
# This could be a temporary error - raise exception so core can retry
raise requests.exceptions.RequestException(f"crt.sh API returned status {response.status_code if response else 'None'}")
certificates = response.json()
if not certificates:
return []
return certificates
def _create_cache_file(self, cache_file_path: Path, domain: str, certificates: List[Dict[str, Any]]) -> None:
"""Create new cache file with certificates."""
try:
cache_data = {
"domain": domain,
"first_cached": datetime.now(timezone.utc).isoformat(),
"last_upstream_query": datetime.now(timezone.utc).isoformat(),
"upstream_query_count": 1,
"certificates": certificates
}
cache_file_path.parent.mkdir(parents=True, exist_ok=True)
with open(cache_file_path, 'w') as f:
json.dump(cache_data, f, separators=(',', ':'))
self.logger.logger.info(f"Created cache file for {domain} with {len(certificates)} certificates")
except Exception as e:
self.logger.logger.warning(f"Failed to create cache file for {domain}: {e}")
def _append_to_cache(self, cache_file_path: Path, new_certificates: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Append new certificates to existing cache and return all certificates."""
try:
# Load existing cache
with open(cache_file_path, 'r') as f:
cache_data = json.load(f)
# Track existing certificate IDs to avoid duplicates
existing_ids = {cert.get('id') for cert in cache_data.get('certificates', [])}
# Add only new certificates
added_count = 0
for cert in new_certificates:
cert_id = cert.get('id')
if cert_id and cert_id not in existing_ids:
cache_data['certificates'].append(cert)
existing_ids.add(cert_id)
added_count += 1
# Update metadata
cache_data['last_upstream_query'] = datetime.now(timezone.utc).isoformat()
cache_data['upstream_query_count'] = cache_data.get('upstream_query_count', 0) + 1
# Write updated cache
with open(cache_file_path, 'w') as f:
json.dump(cache_data, f, separators=(',', ':'))
total_certs = len(cache_data['certificates'])
self.logger.logger.info(f"Appended {added_count} new certificates to cache. Total: {total_certs}")
return cache_data['certificates']
except Exception as e:
self.logger.logger.warning(f"Failed to append to cache: {e}")
return new_certificates # Fallback to new certificates only
def _parse_issuer_organization(self, issuer_dn: str) -> str:
"""
Parse the issuer Distinguished Name to extract just the organization name.
Args:
issuer_dn: Full issuer DN string (e.g., "C=US, O=Let's Encrypt, CN=R11")
Returns:
Organization name (e.g., "Let's Encrypt") or original string if parsing fails
"""
if not issuer_dn:
return issuer_dn
try:
# Split by comma and look for O= component
components = [comp.strip() for comp in issuer_dn.split(',')]
for component in components:
if component.startswith('O='):
# Extract the value after O=
org_name = component[2:].strip()
# Remove quotes if present
if org_name.startswith('"') and org_name.endswith('"'):
org_name = org_name[1:-1]
return org_name
# If no O= component found, return the original string
return issuer_dn
except Exception as e:
self.logger.logger.debug(f"Failed to parse issuer DN '{issuer_dn}': {e}")
return issuer_dn
def _parse_certificate_date(self, date_string: str) -> datetime:
"""
Parse certificate date from crt.sh format.
@@ -133,10 +287,15 @@ class CrtShProvider(BaseProvider):
Returns:
Comprehensive certificate metadata dictionary
"""
# Parse the issuer name to get just the organization
raw_issuer_name = cert_data.get('issuer_name', '')
parsed_issuer_name = self._parse_issuer_organization(raw_issuer_name)
metadata = {
'certificate_id': cert_data.get('id'),
'serial_number': cert_data.get('serial_number'),
'issuer_name': cert_data.get('issuer_name'),
'issuer_name': parsed_issuer_name, # Use parsed organization name
#'issuer_name_full': raw_issuer_name, # deliberately left out, because its not useful in most cases
'issuer_ca_id': cert_data.get('issuer_ca_id'),
'common_name': cert_data.get('common_name'),
'not_before': cert_data.get('not_before'),
@@ -167,7 +326,8 @@ class CrtShProvider(BaseProvider):
def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
"""
Query crt.sh for certificates containing the domain.
Query crt.sh for certificates containing the domain with caching support.
Properly raises exceptions for network errors to allow core logic retries.
"""
if not _is_valid_domain(domain):
return []
@@ -177,130 +337,166 @@ class CrtShProvider(BaseProvider):
print(f"CrtSh query cancelled before start for domain: {domain}")
return []
relationships = []
# === CACHING LOGIC ===
cache_file = self._get_cache_file_path(domain)
cache_status = self._get_cache_status(cache_file)
certificates = []
try:
# Query crt.sh for certificates
url = f"{self.base_url}?q={quote(domain)}&output=json"
response = self.make_request(url, target_indicator=domain, max_retries=3)
if cache_status == "fresh":
# Use cached data
certificates = self._load_cached_certificates(cache_file)
self.logger.logger.info(f"Using cached data for {domain} ({len(certificates)} certificates)")
if not response or response.status_code != 200:
return []
elif cache_status == "not_found":
# Fresh query, create new cache
certificates = self._query_crtsh_api(domain)
if certificates: # Only cache if we got results
self._create_cache_file(cache_file, domain, certificates)
self.logger.logger.info(f"Cached fresh data for {domain} ({len(certificates)} certificates)")
else:
self.logger.logger.info(f"No certificates found for {domain}, not caching")
# Check for cancellation after request
if self._stop_event and self._stop_event.is_set():
print(f"CrtSh query cancelled after request for domain: {domain}")
return []
elif cache_status == "stale":
# Append query, update existing cache
try:
new_certificates = self._query_crtsh_api(domain)
if new_certificates:
certificates = self._append_to_cache(cache_file, new_certificates)
self.logger.logger.info(f"Refreshed and appended cache for {domain}")
else:
# Use existing cache if API returns no results
certificates = self._load_cached_certificates(cache_file)
self.logger.logger.info(f"API returned no new results, using existing cache for {domain}")
except requests.exceptions.RequestException:
# If API call fails for stale cache, use cached data and re-raise for retry logic
certificates = self._load_cached_certificates(cache_file)
if certificates:
self.logger.logger.warning(f"API call failed for {domain}, using stale cache data ({len(certificates)} certificates)")
# Don't re-raise here, just use cached data
else:
# No cached data and API failed - re-raise for retry
raise
certificates = response.json()
except requests.exceptions.RequestException as e:
# Network/API errors should be re-raised so core logic can retry
self.logger.logger.error(f"API query failed for {domain}: {e}")
raise e
except json.JSONDecodeError as e:
# JSON parsing errors should also be raised for retry
self.logger.logger.error(f"Failed to parse JSON response from crt.sh for {domain}: {e}")
raise e
if not certificates:
return []
# Check for cancellation after cache operations
if self._stop_event and self._stop_event.is_set():
print(f"CrtSh query cancelled after cache operations for domain: {domain}")
return []
# Check for cancellation before processing
if self._stop_event and self._stop_event.is_set():
print(f"CrtSh query cancelled before processing for domain: {domain}")
return []
if not certificates:
return []
# Aggregate certificate data by domain
domain_certificates = {}
all_discovered_domains = set()
return self._process_certificates_to_relationships(domain, certificates)
# Process certificates with cancellation checking
for i, cert_data in enumerate(certificates):
# Check for cancellation every 5 certificates instead of 10 for faster response
if i % 5 == 0 and self._stop_event and self._stop_event.is_set():
print(f"CrtSh processing cancelled at certificate {i} for domain: {domain}")
break
def _process_certificates_to_relationships(self, domain: str, certificates: List[Dict[str, Any]]) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
"""
Process certificates to relationships using existing logic.
This method contains the original processing logic from query_domain.
"""
relationships = []
cert_metadata = self._extract_certificate_metadata(cert_data)
cert_domains = self._extract_domains_from_certificate(cert_data)
# Check for cancellation before processing
if self._stop_event and self._stop_event.is_set():
print(f"CrtSh processing cancelled before processing for domain: {domain}")
return []
# Add all domains from this certificate to our tracking
for cert_domain in cert_domains:
# Additional stop check during domain processing
if i % 20 == 0 and self._stop_event and self._stop_event.is_set():
print(f"CrtSh domain processing cancelled for domain: {domain}")
break
# Aggregate certificate data by domain
domain_certificates = {}
all_discovered_domains = set()
if not _is_valid_domain(cert_domain):
continue
# Process certificates with cancellation checking
for i, cert_data in enumerate(certificates):
# Check for cancellation every 5 certificates for faster response
if i % 5 == 0 and self._stop_event and self._stop_event.is_set():
print(f"CrtSh processing cancelled at certificate {i} for domain: {domain}")
break
all_discovered_domains.add(cert_domain)
cert_metadata = self._extract_certificate_metadata(cert_data)
cert_domains = self._extract_domains_from_certificate(cert_data)
# Initialize domain certificate list if needed
if cert_domain not in domain_certificates:
domain_certificates[cert_domain] = []
# Add this certificate to the domain's certificate list
domain_certificates[cert_domain].append(cert_metadata)
# Final cancellation check before creating relationships
if self._stop_event and self._stop_event.is_set():
print(f"CrtSh query cancelled before relationship creation for domain: {domain}")
return []
# Create relationships from query domain to ALL discovered domains with stop checking
for i, discovered_domain in enumerate(all_discovered_domains):
if discovered_domain == domain:
continue # Skip self-relationships
# Check for cancellation every 10 relationships
if i % 10 == 0 and self._stop_event and self._stop_event.is_set():
print(f"CrtSh relationship creation cancelled for domain: {domain}")
break
if not _is_valid_domain(discovered_domain):
# Add all domains from this certificate to our tracking
all_discovered_domains.update(cert_domains)
for cert_domain in cert_domains:
if not _is_valid_domain(cert_domain):
continue
# Get certificates for both domains
query_domain_certs = domain_certificates.get(domain, [])
discovered_domain_certs = domain_certificates.get(discovered_domain, [])
# Initialize domain certificate list if needed
if cert_domain not in domain_certificates:
domain_certificates[cert_domain] = []
# Find shared certificates (for metadata purposes)
shared_certificates = self._find_shared_certificates(query_domain_certs, discovered_domain_certs)
# Add this certificate to the domain's certificate list
domain_certificates[cert_domain].append(cert_metadata)
# Calculate confidence based on relationship type and shared certificates
confidence = self._calculate_domain_relationship_confidence(
domain, discovered_domain, shared_certificates, all_discovered_domains
)
# Final cancellation check before creating relationships
if self._stop_event and self._stop_event.is_set():
print(f"CrtSh query cancelled before relationship creation for domain: {domain}")
return []
# Create comprehensive raw data for the relationship
relationship_raw_data = {
'relationship_type': 'certificate_discovery',
'shared_certificates': shared_certificates,
'total_shared_certs': len(shared_certificates),
'discovery_context': self._determine_relationship_context(discovered_domain, domain),
'domain_certificates': {
domain: self._summarize_certificates(query_domain_certs),
discovered_domain: self._summarize_certificates(discovered_domain_certs)
}
# Create relationships from query domain to ALL discovered domains with stop checking
for i, discovered_domain in enumerate(all_discovered_domains):
if discovered_domain == domain:
continue # Skip self-relationships
# Check for cancellation every 10 relationships
if i % 10 == 0 and self._stop_event and self._stop_event.is_set():
print(f"CrtSh relationship creation cancelled for domain: {domain}")
break
if not _is_valid_domain(discovered_domain):
continue
# Get certificates for both domains
query_domain_certs = domain_certificates.get(domain, [])
discovered_domain_certs = domain_certificates.get(discovered_domain, [])
# Find shared certificates (for metadata purposes)
shared_certificates = self._find_shared_certificates(query_domain_certs, discovered_domain_certs)
# Calculate confidence based on relationship type and shared certificates
confidence = self._calculate_domain_relationship_confidence(
domain, discovered_domain, shared_certificates, all_discovered_domains
)
# Create comprehensive raw data for the relationship
relationship_raw_data = {
'relationship_type': 'certificate_discovery',
'shared_certificates': shared_certificates,
'total_shared_certs': len(shared_certificates),
'discovery_context': self._determine_relationship_context(discovered_domain, domain),
'domain_certificates': {
domain: self._summarize_certificates(query_domain_certs),
discovered_domain: self._summarize_certificates(discovered_domain_certs)
}
}
# Create domain -> domain relationship
relationships.append((
domain,
discovered_domain,
'san_certificate',
confidence,
relationship_raw_data
))
# Log the relationship discovery
self.log_relationship_discovery(
source_node=domain,
target_node=discovered_domain,
relationship_type='san_certificate',
confidence_score=confidence,
raw_data=relationship_raw_data,
discovery_method="certificate_transparency_analysis"
)
except json.JSONDecodeError as e:
self.logger.logger.error(f"Failed to parse JSON response from crt.sh: {e}")
except requests.exceptions.RequestException as e:
self.logger.logger.error(f"HTTP request to crt.sh failed: {e}")
# Create domain -> domain relationship
relationships.append((
domain,
discovered_domain,
'san_certificate',
confidence,
relationship_raw_data
))
# Log the relationship discovery
self.log_relationship_discovery(
source_node=domain,
target_node=discovered_domain,
relationship_type='san_certificate',
confidence_score=confidence,
raw_data=relationship_raw_data,
discovery_method="certificate_transparency_analysis"
)
return relationships
@@ -345,14 +541,15 @@ class CrtShProvider(BaseProvider):
'expires_soon_count': 0,
'unique_issuers': [],
'latest_certificate': None,
'has_valid_cert': False
'has_valid_cert': False,
'certificate_details': [] # Always include empty list
}
valid_count = sum(1 for cert in certificates if cert.get('is_currently_valid'))
expired_count = len(certificates) - valid_count
expires_soon_count = sum(1 for cert in certificates if cert.get('expires_soon'))
# Get unique issuers
# Get unique issuers (using parsed organization names)
unique_issuers = list(set(cert.get('issuer_name') for cert in certificates if cert.get('issuer_name')))
# Find the most recent certificate
@@ -369,6 +566,13 @@ class CrtShProvider(BaseProvider):
except Exception:
continue
# Sort certificates by date for better display (newest first)
sorted_certificates = sorted(
certificates,
key=lambda c: self._get_certificate_sort_date(c),
reverse=True
)
return {
'total_certificates': len(certificates),
'valid_certificates': valid_count,
@@ -377,9 +581,35 @@ class CrtShProvider(BaseProvider):
'unique_issuers': unique_issuers,
'latest_certificate': latest_cert,
'has_valid_cert': valid_count > 0,
'certificate_details': certificates # Full details for forensic analysis
'certificate_details': sorted_certificates # Include full certificate details
}
def _get_certificate_sort_date(self, cert: Dict[str, Any]) -> datetime:
"""
Get a sortable date from certificate data for chronological ordering.
Args:
cert: Certificate metadata dictionary
Returns:
Datetime object for sorting (falls back to epoch if parsing fails)
"""
try:
# Try not_before first (issue date)
if cert.get('not_before'):
return self._parse_certificate_date(cert['not_before'])
# Fall back to entry_timestamp if available
if cert.get('entry_timestamp'):
return self._parse_certificate_date(cert['entry_timestamp'])
# Last resort - return a very old date for certificates without dates
return datetime(1970, 1, 1, tzinfo=timezone.utc)
except Exception:
# If all parsing fails, return epoch
return datetime(1970, 1, 1, tzinfo=timezone.utc)
def _calculate_domain_relationship_confidence(self, domain1: str, domain2: str,
shared_certificates: List[Dict[str, Any]],
all_discovered_domains: Set[str]) -> float:

View File

@@ -1,7 +1,6 @@
# dnsrecon/providers/dns_provider.py
import dns.resolver
import dns.reversename
from dns import resolver, reversename
from typing import List, Dict, Any, Tuple
from .base_provider import BaseProvider
from utils.helpers import _is_valid_ip, _is_valid_domain
@@ -13,7 +12,7 @@ class DNSProvider(BaseProvider):
Now uses session-specific configuration.
"""
def __init__(self, session_config=None):
def __init__(self, name=None, session_config=None):
"""Initialize DNS provider with session-specific configuration."""
super().__init__(
name="dns",
@@ -23,7 +22,7 @@ class DNSProvider(BaseProvider):
)
# Configure DNS resolver
self.resolver = dns.resolver.Resolver()
self.resolver = resolver.Resolver()
self.resolver.timeout = 5
self.resolver.lifetime = 10
#self.resolver.nameservers = ['127.0.0.1']
@@ -51,12 +50,7 @@ class DNSProvider(BaseProvider):
def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
"""
Query DNS records for the domain to discover relationships.
Args:
domain: Domain to investigate
Returns:
List of relationships discovered from DNS analysis
...
"""
if not _is_valid_domain(domain):
return []
@@ -65,7 +59,15 @@ class DNSProvider(BaseProvider):
# Query all record types
for record_type in ['A', 'AAAA', 'CNAME', 'MX', 'NS', 'SOA', 'TXT', 'SRV', 'CAA']:
relationships.extend(self._query_record(domain, record_type))
try:
relationships.extend(self._query_record(domain, record_type))
except resolver.NoAnswer:
# This is not an error, just a confirmation that the record doesn't exist.
self.logger.logger.debug(f"No {record_type} record found for {domain}")
except Exception as e:
self.failed_requests += 1
self.logger.logger.debug(f"{record_type} record query failed for {domain}: {e}")
# Optionally, you might want to re-raise other, more serious exceptions.
return relationships
@@ -87,7 +89,7 @@ class DNSProvider(BaseProvider):
try:
# Perform reverse DNS lookup
self.total_requests += 1
reverse_name = dns.reversename.from_address(ip)
reverse_name = reversename.from_address(ip)
response = self.resolver.resolve(reverse_name, 'PTR')
self.successful_requests += 1
@@ -119,9 +121,14 @@ class DNSProvider(BaseProvider):
discovery_method="reverse_dns_lookup"
)
except resolver.NXDOMAIN:
self.failed_requests += 1
self.logger.logger.debug(f"Reverse DNS lookup failed for {ip}: NXDOMAIN")
except Exception as e:
self.failed_requests += 1
self.logger.logger.debug(f"Reverse DNS lookup failed for {ip}: {e}")
# Re-raise the exception so the scanner can handle the failure
raise e
return relationships
@@ -185,5 +192,7 @@ class DNSProvider(BaseProvider):
except Exception as e:
self.failed_requests += 1
self.logger.logger.debug(f"{record_type} record query failed for {domain}: {e}")
# Re-raise the exception so the scanner can handle it
raise e
return relationships

View File

@@ -1,7 +1,4 @@
"""
Shodan provider for DNSRecon.
Discovers IP relationships and infrastructure context through Shodan API.
"""
# dnsrecon/providers/shodan_provider.py
import json
from typing import List, Dict, Any, Tuple
@@ -15,7 +12,7 @@ class ShodanProvider(BaseProvider):
Now uses session-specific API keys.
"""
def __init__(self, session_config=None):
def __init__(self, name=None, session_config=None):
"""Initialize Shodan provider with session-specific configuration."""
super().__init__(
name="shodan",

View File

@@ -7,3 +7,4 @@ urllib3>=2.0.0
dnspython>=2.4.2
gunicorn
redis
python-dotenv

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,57 @@
/**
* Graph visualization module for DNSRecon
* Handles network graph rendering using vis.js
* Handles network graph rendering using vis.js with proper large entity node hiding
*/
const contextMenuCSS = `
.graph-context-menu {
position: fixed;
z-index: 1000;
background: linear-gradient(135deg, #2a2a2a 0%, #1e1e1e 100%);
border: 1px solid #444;
border-radius: 6px;
box-shadow: 0 8px 25px rgba(0,0,0,0.6);
display: none;
font-family: 'Roboto Mono', monospace;
font-size: 0.9rem;
color: #c7c7c7;
min-width: 180px;
overflow: hidden;
}
.graph-context-menu ul {
list-style: none;
padding: 0.5rem 0;
margin: 0;
}
.graph-context-menu ul li {
padding: 0.75rem 1rem;
cursor: pointer;
transition: all 0.2s ease;
display: flex;
align-items: center;
gap: 0.5rem;
}
.graph-context-menu ul li:hover {
background: linear-gradient(135deg, #3a3a3a 0%, #2e2e2e 100%);
color: #00ff41;
}
.graph-context-menu .menu-icon {
font-size: 0.9rem;
width: 1.2rem;
text-align: center;
}
.graph-context-menu ul li:first-child {
border-top: none;
}
.graph-context-menu ul li:last-child {
border-bottom: none;
}
`;
class GraphManager {
constructor(containerId) {
@@ -12,6 +62,13 @@ class GraphManager {
this.isInitialized = false;
this.currentLayout = 'physics';
this.nodeInfoPopup = null;
this.contextMenu = null;
this.history = [];
this.filterPanel = null;
this.trueRootIds = new Set();
// Track large entity members for proper hiding
this.largeEntityMembers = new Set();
this.isScanning = false;
this.options = {
nodes: {
@@ -115,8 +172,14 @@ class GraphManager {
randomSeed: 2
}
};
if (typeof document !== 'undefined') {
const style = document.createElement('style');
style.textContent = contextMenuCSS;
document.head.appendChild(style);
}
this.createNodeInfoPopup();
this.createContextMenu();
document.body.addEventListener('click', () => this.hideContextMenu());
}
/**
@@ -129,6 +192,30 @@ class GraphManager {
document.body.appendChild(this.nodeInfoPopup);
}
/**
* Create context menu
*/
createContextMenu() {
// Remove existing context menu if it exists
const existing = document.getElementById('graph-context-menu');
if (existing) {
existing.remove();
}
this.contextMenu = document.createElement('div');
this.contextMenu.id = 'graph-context-menu';
this.contextMenu.className = 'graph-context-menu';
this.contextMenu.style.display = 'none';
// Prevent body click listener from firing when clicking the menu itself
this.contextMenu.addEventListener('click', (event) => {
event.stopPropagation();
});
document.body.appendChild(this.contextMenu);
console.log('Context menu created and added to body');
}
/**
* Initialize the network graph
*/
@@ -155,6 +242,7 @@ class GraphManager {
// Add graph controls
this.addGraphControls();
this.addFilterPanel();
console.log('Graph initialized successfully');
} catch (error) {
@@ -173,6 +261,8 @@ class GraphManager {
<button class="graph-control-btn" id="graph-fit" title="Fit to Screen">[FIT]</button>
<button class="graph-control-btn" id="graph-physics" title="Toggle Physics">[PHYSICS]</button>
<button class="graph-control-btn" id="graph-cluster" title="Cluster Nodes">[CLUSTER]</button>
<button class="graph-control-btn" id="graph-unhide" title="Unhide All">[UNHIDE]</button>
<button class="graph-control-btn" id="graph-revert" title="Revert Last Action">[REVERT]</button>
`;
this.container.appendChild(controlsContainer);
@@ -181,6 +271,14 @@ class GraphManager {
document.getElementById('graph-fit').addEventListener('click', () => this.fitView());
document.getElementById('graph-physics').addEventListener('click', () => this.togglePhysics());
document.getElementById('graph-cluster').addEventListener('click', () => this.toggleClustering());
document.getElementById('graph-unhide').addEventListener('click', () => this.unhideAll());
document.getElementById('graph-revert').addEventListener('click', () => this.revertLastAction());
}
addFilterPanel() {
this.filterPanel = document.createElement('div');
this.filterPanel.className = 'graph-filter-panel';
this.container.appendChild(this.filterPanel);
}
/**
@@ -189,8 +287,31 @@ class GraphManager {
setupNetworkEvents() {
if (!this.network) return;
// FIXED: Right-click context menu
this.container.addEventListener('contextmenu', (event) => {
event.preventDefault();
console.log('Right-click detected at:', event.offsetX, event.offsetY);
// Get coordinates relative to the canvas
const pointer = {
x: event.offsetX,
y: event.offsetY
};
const nodeId = this.network.getNodeAt(pointer);
console.log('Node at pointer:', nodeId);
if (nodeId) {
// Pass the original client event for positioning
this.showContextMenu(nodeId, event);
} else {
this.hideContextMenu();
}
});
// Node click event with details
this.network.on('click', (params) => {
this.hideContextMenu();
if (params.nodes.length > 0) {
const nodeId = params.nodes[0];
if (this.network.isCluster(nodeId)) {
@@ -216,14 +337,6 @@ class GraphManager {
}
});
// FIX: Comment out the problematic context menu handler
this.network.on('oncontext', (params) => {
params.event.preventDefault();
// if (params.nodes.length > 0) {
// this.showNodeContextMenu(params.pointer.DOM, params.nodes[0]);
// }
});
// Stabilization events with progress
this.network.on('stabilizationProgress', (params) => {
const progress = params.iterations / params.total;
@@ -239,6 +352,13 @@ class GraphManager {
console.log('Selected nodes:', params.nodes);
console.log('Selected edges:', params.edges);
});
// Click away to hide context menu
document.addEventListener('click', (e) => {
if (!this.contextMenu.contains(e.target)) {
this.hideContextMenu();
}
});
}
/**
@@ -256,21 +376,28 @@ class GraphManager {
this.initialize();
}
this.largeEntityMembers.clear();
const largeEntityMap = new Map();
graphData.nodes.forEach(node => {
if (node.type === 'large_entity' && node.attributes && Array.isArray(node.attributes.nodes)) {
node.attributes.nodes.forEach(nodeId => {
largeEntityMap.set(nodeId, node.id);
this.largeEntityMembers.add(nodeId);
});
}
});
const processedNodes = graphData.nodes.map(node => {
const processed = this.processNode(node);
if (largeEntityMap.has(node.id)) {
processed.hidden = true;
}
return processed;
const filteredNodes = graphData.nodes.filter(node => {
// Only include nodes that are NOT members of large entities
return !this.largeEntityMembers.has(node.id);
});
console.log(`Filtered ${graphData.nodes.length - filteredNodes.length} large entity member nodes from visualization`);
// Process only the filtered nodes
const processedNodes = filteredNodes.map(node => {
return this.processNode(node);
});
const mergedEdges = {};
@@ -315,6 +442,11 @@ class GraphManager {
this.nodes.update(processedNodes);
this.edges.update(processedEdges);
// After data is loaded, compute roots and apply filters
this.computeTrueRoots();
this.updateFilterControls();
this.applyAllFilters();
// Highlight new additions briefly
if (newNodes.length > 0 || newEdges.length > 0) {
setTimeout(() => this.highlightNewElements(newNodes, newEdges), 100);
@@ -326,6 +458,8 @@ class GraphManager {
}
console.log(`Graph updated: ${processedNodes.length} nodes, ${processedEdges.length} edges (${newNodes.length} new nodes, ${newEdges.length} new edges)`);
console.log(`Large entity members hidden: ${this.largeEntityMembers.size}`);
} catch (error) {
console.error('Failed to update graph:', error);
this.showError('Failed to update visualization');
@@ -380,7 +514,7 @@ class GraphManager {
// Single correlation value
const value = Array.isArray(values) && values.length > 0 ? values[0] : (metadata.value || 'Unknown');
const displayValue = typeof value === 'string' && value.length > 20 ? value.substring(0, 17) + '...' : value;
processedNode.label = `Corr: ${displayValue}`;
processedNode.label = `${displayValue}`;
processedNode.title = `Correlation: ${value}`;
}
}
@@ -412,8 +546,6 @@ class GraphManager {
}
};
return processedEdge;
}
@@ -460,7 +592,6 @@ class GraphManager {
return colors[nodeType] || '#ffffff';
}
/**
* Get node border color based on type
* @param {string} nodeType - Node type
@@ -850,6 +981,8 @@ class GraphManager {
clear() {
this.nodes.clear();
this.edges.clear();
this.history = [];
this.largeEntityMembers.clear(); // Clear large entity tracking
// Show placeholder
const placeholder = this.container.querySelector('.graph-placeholder');
@@ -870,59 +1003,590 @@ class GraphManager {
}
}
/* * @param {Set} excludedNodeIds - Node IDs to exclude from analysis (for simulation)
* @param {Set} excludedEdgeTypes - Edge types to exclude from traversal
* @param {Set} excludedNodeTypes - Node types to exclude from traversal
* @returns {Object} Analysis results with reachable/unreachable nodes
*/
analyzeGraphReachability(excludedNodeIds = new Set(), excludedEdgeTypes = new Set(), excludedNodeTypes = new Set()) {
console.log("Performing comprehensive reachability analysis...");
const analysis = {
reachableNodes: new Set(),
unreachableNodes: new Set(),
isolatedClusters: [],
affectedNodes: new Set()
};
if (this.nodes.length === 0) return analysis;
// Build adjacency list excluding specified elements
const adjacencyList = {};
this.nodes.getIds().forEach(id => {
if (!excludedNodeIds.has(id)) {
adjacencyList[id] = [];
}
});
this.edges.forEach(edge => {
const edgeType = edge.metadata?.relationship_type || '';
if (!excludedEdgeTypes.has(edgeType) &&
!excludedNodeIds.has(edge.from) &&
!excludedNodeIds.has(edge.to)) {
if (adjacencyList[edge.from]) {
adjacencyList[edge.from].push(edge.to);
}
}
});
// BFS traversal from true roots
const traversalQueue = [];
// Start from true roots that aren't excluded
this.trueRootIds.forEach(rootId => {
if (!excludedNodeIds.has(rootId)) {
const node = this.nodes.get(rootId);
if (node && !excludedNodeTypes.has(node.type)) {
if (!analysis.reachableNodes.has(rootId)) {
traversalQueue.push(rootId);
analysis.reachableNodes.add(rootId);
}
}
}
});
// BFS to find all reachable nodes
let queueIndex = 0;
while (queueIndex < traversalQueue.length) {
const currentNode = traversalQueue[queueIndex++];
for (const neighbor of (adjacencyList[currentNode] || [])) {
if (!analysis.reachableNodes.has(neighbor)) {
const node = this.nodes.get(neighbor);
if (node && !excludedNodeTypes.has(node.type)) {
analysis.reachableNodes.add(neighbor);
traversalQueue.push(neighbor);
}
}
}
}
// Identify unreachable nodes (maintaining forensic integrity)
Object.keys(adjacencyList).forEach(nodeId => {
if (!analysis.reachableNodes.has(nodeId)) {
analysis.unreachableNodes.add(nodeId);
}
});
// Find isolated clusters among unreachable nodes
analysis.isolatedClusters = this.findIsolatedClusters(
Array.from(analysis.unreachableNodes),
adjacencyList
);
console.log(`Reachability analysis complete:`, {
reachable: analysis.reachableNodes.size,
unreachable: analysis.unreachableNodes.size,
clusters: analysis.isolatedClusters.length
});
return analysis;
}
/**
* Get network statistics
* @returns {Object} Statistics object
* Find isolated clusters within a set of nodes
* Used for forensic analysis to identify disconnected subgraphs
*/
findIsolatedClusters(nodeIds, adjacencyList) {
const visited = new Set();
const clusters = [];
for (const nodeId of nodeIds) {
if (!visited.has(nodeId)) {
const cluster = [];
const stack = [nodeId];
while (stack.length > 0) {
const current = stack.pop();
if (!visited.has(current)) {
visited.add(current);
cluster.push(current);
// Add unvisited neighbors within the unreachable set
for (const neighbor of (adjacencyList[current] || [])) {
if (nodeIds.includes(neighbor) && !visited.has(neighbor)) {
stack.push(neighbor);
}
}
}
}
if (cluster.length > 0) {
clusters.push(cluster);
}
}
}
return clusters;
}
/**
* ENHANCED: Get comprehensive graph statistics with forensic information
* Updates the existing getStatistics() method
*/
getStatistics() {
return {
const basicStats = {
nodeCount: this.nodes.length,
edgeCount: this.edges.length,
//isStabilized: this.network ? this.network.isStabilized() : false
largeEntityMembersHidden: this.largeEntityMembers.size
};
// Add forensic statistics
const visibleNodes = this.nodes.get({ filter: node => !node.hidden });
const hiddenNodes = this.nodes.get({ filter: node => node.hidden });
return {
...basicStats,
forensicStatistics: {
visibleNodes: visibleNodes.length,
hiddenNodes: hiddenNodes.length,
trueRoots: this.trueRootIds.size,
integrityStatus: visibleNodes.length > 0 && this.trueRootIds.size > 0 ? 'INTACT' : 'COMPROMISED'
}
};
}
computeTrueRoots() {
this.trueRootIds.clear();
const allNodes = this.nodes.get({ returnType: 'Object' });
const allEdges = this.edges.get();
const inDegrees = {};
for (const nodeId in allNodes) {
inDegrees[nodeId] = 0;
}
allEdges.forEach(edge => {
if (inDegrees[edge.to] !== undefined) {
inDegrees[edge.to]++;
}
});
for (const nodeId in allNodes) {
if (inDegrees[nodeId] === 0) {
this.trueRootIds.add(nodeId);
}
}
console.log("Computed true roots:", this.trueRootIds);
}
updateFilterControls() {
if (!this.filterPanel) return;
const nodeTypes = new Set(this.nodes.get().map(n => n.type));
const edgeTypes = new Set(this.edges.get().map(e => e.metadata.relationship_type));
// Wrap both columns in a single container with vertical layout
let filterHTML = '<div class="filter-container">';
// Nodes section
filterHTML += '<div class="filter-column"><h4>Nodes</h4><div class="checkbox-group">';
nodeTypes.forEach(type => {
const label = type === 'correlation_object' ? 'latent correlations' : type;
const isChecked = type !== 'correlation_object';
filterHTML += `<label><input type="checkbox" data-filter-type="node" value="${type}" ${isChecked ? 'checked' : ''}> ${label}</label>`;
});
filterHTML += '</div></div>';
// Edges section
filterHTML += '<div class="filter-column"><h4>Edges</h4><div class="checkbox-group">';
edgeTypes.forEach(type => {
filterHTML += `<label><input type="checkbox" data-filter-type="edge" value="${type}" checked> ${type}</label>`;
});
filterHTML += '</div></div>';
filterHTML += '</div>'; // Close filter-container
this.filterPanel.innerHTML = filterHTML;
this.filterPanel.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
checkbox.addEventListener('change', () => this.applyAllFilters());
});
}
/**
* ENHANCED: Apply filters using consolidated reachability analysis
* Replaces the existing applyAllFilters() method
*/
applyAllFilters() {
console.log("Applying filters with enhanced reachability analysis...");
if (this.nodes.length === 0) return;
// Get filter criteria from UI
const excludedNodeTypes = new Set();
this.filterPanel?.querySelectorAll('input[data-filter-type="node"]:not(:checked)').forEach(cb => {
excludedNodeTypes.add(cb.value);
});
const excludedEdgeTypes = new Set();
this.filterPanel?.querySelectorAll('input[data-filter-type="edge"]:not(:checked)').forEach(cb => {
excludedEdgeTypes.add(cb.value);
});
// Perform comprehensive analysis
const analysis = this.analyzeGraphReachability(new Set(), excludedEdgeTypes, excludedNodeTypes);
// Apply visibility updates
const nodeUpdates = this.nodes.map(node => ({
id: node.id,
hidden: !analysis.reachableNodes.has(node.id)
}));
const edgeUpdates = this.edges.map(edge => ({
id: edge.id,
hidden: excludedEdgeTypes.has(edge.metadata?.relationship_type || '') ||
!analysis.reachableNodes.has(edge.from) ||
!analysis.reachableNodes.has(edge.to)
}));
this.nodes.update(nodeUpdates);
this.edges.update(edgeUpdates);
console.log(`Enhanced filters applied. Visible nodes: ${analysis.reachableNodes.size}`);
}
/**
* ENHANCED: Hide node with forensic integrity using reachability analysis
* Replaces the existing hideNodeAndOrphans() method
*/
hideNodeWithReachabilityAnalysis(nodeId) {
console.log(`Hiding node ${nodeId} with reachability analysis...`);
// Simulate hiding this node and analyze impact
const excludedNodes = new Set([nodeId]);
const analysis = this.analyzeGraphReachability(excludedNodes);
// Nodes that will become unreachable (should be hidden)
const nodesToHide = [nodeId, ...Array.from(analysis.unreachableNodes)];
// Store history for potential revert
const historyData = {
nodeIds: nodesToHide,
operation: 'hide_with_reachability',
timestamp: Date.now()
};
// Apply hiding with forensic documentation
const updates = nodesToHide.map(id => ({
id: id,
hidden: true,
forensicNote: `Hidden due to reachability analysis from ${nodeId}`
}));
this.nodes.update(updates);
this.addToHistory('hide', historyData);
console.log(`Forensic hide operation: ${nodesToHide.length} nodes hidden`, {
originalTarget: nodeId,
cascadeNodes: nodesToHide.length - 1,
isolatedClusters: analysis.isolatedClusters.length
});
return {
hiddenNodes: nodesToHide,
isolatedClusters: analysis.isolatedClusters
};
}
/**
* Apply filters to the graph
* @param {string} nodeType - The type of node to show ('all' for no filter)
* @param {number} minConfidence - The minimum confidence score for edges to be visible
* ENHANCED: Delete node with forensic integrity using reachability analysis
* Replaces the existing deleteNodeAndOrphans() method
*/
applyFilters(nodeType, minConfidence) {
console.log(`Applying filters: nodeType=${nodeType}, minConfidence=${minConfidence}`);
async deleteNodeWithReachabilityAnalysis(nodeId) {
console.log(`Deleting node ${nodeId} with reachability analysis...`);
const nodeUpdates = [];
const edgeUpdates = [];
// Simulate deletion and analyze impact
const excludedNodes = new Set([nodeId]);
const analysis = this.analyzeGraphReachability(excludedNodes);
const allNodes = this.nodes.get({ returnType: 'Object' });
const allEdges = this.edges.get();
// Nodes that will become unreachable (should be deleted)
const nodesToDelete = [nodeId, ...Array.from(analysis.unreachableNodes)];
// Determine which nodes are visible based on the nodeType filter
for (const nodeId in allNodes) {
const node = allNodes[nodeId];
const isVisible = (nodeType === 'all' || node.type === nodeType);
nodeUpdates.push({ id: nodeId, hidden: !isVisible });
// Collect forensic data before deletion
const historyData = {
nodes: nodesToDelete.map(id => this.nodes.get(id)).filter(Boolean),
edges: [],
operation: 'delete_with_reachability',
timestamp: Date.now(),
forensicAnalysis: {
originalTarget: nodeId,
cascadeNodes: nodesToDelete.length - 1,
isolatedClusters: analysis.isolatedClusters.length,
clusterSizes: analysis.isolatedClusters.map(cluster => cluster.length)
}
};
// Collect affected edges
nodesToDelete.forEach(id => {
const connectedEdgeIds = this.network.getConnectedEdges(id);
const edges = this.edges.get(connectedEdgeIds);
historyData.edges.push(...edges);
});
// Remove duplicates from edges
historyData.edges = Array.from(new Map(historyData.edges.map(e => [e.id, e])).values());
// Perform backend deletion with forensic logging
let operationFailed = false;
for (const targetNodeId of nodesToDelete) {
try {
const response = await fetch(`/api/graph/node/${targetNodeId}`, {
method: 'DELETE',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
forensicContext: {
operation: 'reachability_cascade_delete',
originalTarget: nodeId,
analysisTimestamp: historyData.timestamp
}
})
});
const result = await response.json();
if (!result.success) {
console.error(`Backend deletion failed for node ${targetNodeId}:`, result.error);
operationFailed = true;
break;
}
console.log(`Node ${targetNodeId} deleted from backend with forensic context`);
this.nodes.remove({ id: targetNodeId });
} catch (error) {
console.error(`API error during deletion of node ${targetNodeId}:`, error);
operationFailed = true;
break;
}
}
// Update nodes first to determine edge visibility
this.nodes.update(nodeUpdates);
// Handle operation results
if (!operationFailed) {
this.addToHistory('delete', historyData);
console.log(`Forensic delete operation completed:`, historyData.forensicAnalysis);
// Determine which edges are visible based on confidence and connected nodes
for (const edge of allEdges) {
const sourceNode = this.nodes.get(edge.from);
const targetNode = this.nodes.get(edge.to);
const confidence = edge.metadata ? edge.metadata.confidence_score : 0;
return {
success: true,
deletedNodes: nodesToDelete,
forensicAnalysis: historyData.forensicAnalysis
};
} else {
// Revert UI changes if backend operations failed - use update instead of add
console.log("Reverting UI changes due to backend failure");
this.nodes.update(historyData.nodes);
this.edges.update(historyData.edges);
const isVisible = confidence >= minConfidence &&
sourceNode && !sourceNode.hidden &&
targetNode && !targetNode.hidden;
edgeUpdates.push({ id: edge.id, hidden: !isVisible });
return {
success: false,
error: "Backend deletion failed, UI reverted"
};
}
this.edges.update(edgeUpdates);
console.log('Filters applied.');
}
/**
* Show context menu for a node
* @param {string} nodeId - The ID of the node
* @param {Event} event - The contextmenu event
*/
showContextMenu(nodeId, event) {
console.log('Showing context menu for node:', nodeId);
const node = this.nodes.get(nodeId);
// Create menu items
let menuItems = `
<ul>
<li data-action="focus" data-node-id="${nodeId}">
<span class="menu-icon">🎯</span>
<span>Focus on Node</span>
</li>
`;
// Add "Iterate Scan" option only for domain or IP nodes
if (node && (node.type === 'domain' || node.type === 'ip')) {
const disabled = this.isScanning ? 'disabled' : ''; // Check if scanning
const title = this.isScanning ? 'A scan is already in progress' : 'Iterate Scan (Add to Graph)'; // Add a title for disabled state
menuItems += `
<li data-action="iterate" data-node-id="${nodeId}" ${disabled} title="${title}">
<span class="menu-icon"></span>
<span>Iterate Scan (Add to Graph)</span>
</li>
`;
}
menuItems += `
<li data-action="hide" data-node-id="${nodeId}">
<span class="menu-icon">👁️‍🗨️</span>
<span>Hide Node</span>
</li>
<li data-action="delete" data-node-id="${nodeId}">
<span class="menu-icon">🗑️</span>
<span>Delete Node</span>
</li>
<li data-action="details" data-node-id="${nodeId}">
<span class="menu-icon"></span>
<span>Show Details</span>
</li>
</ul>
`;
this.contextMenu.innerHTML = menuItems;
// Position the menu
this.contextMenu.style.left = `${event.clientX}px`;
this.contextMenu.style.top = `${event.clientY}px`;
this.contextMenu.style.display = 'block';
// Ensure menu stays within viewport
const rect = this.contextMenu.getBoundingClientRect();
if (rect.right > window.innerWidth) {
this.contextMenu.style.left = `${event.clientX - rect.width}px`;
}
if (rect.bottom > window.innerHeight) {
this.contextMenu.style.top = `${event.clientY - rect.height}px`;
}
// Add event listeners to menu items
this.contextMenu.querySelectorAll('li').forEach(item => {
item.addEventListener('click', (e) => {
if (e.currentTarget.hasAttribute('disabled')) { // Prevent action if disabled
e.stopPropagation();
return;
}
e.stopPropagation();
const action = e.currentTarget.dataset.action;
const nodeId = e.currentTarget.dataset.nodeId;
console.log('Context menu action:', action, 'for node:', nodeId);
this.performContextMenuAction(action, nodeId);
this.hideContextMenu();
});
});
}
/**
* Hide the context menu
*/
hideContextMenu() {
if (this.contextMenu) {
this.contextMenu.style.display = 'none';
}
}
/**
* UPDATED: Enhanced context menu actions using new methods
* Updates the existing performContextMenuAction() method
*/
performContextMenuAction(action, nodeId) {
console.log('Performing enhanced action:', action, 'on node:', nodeId);
switch (action) {
case 'focus':
this.focusOnNode(nodeId);
break;
case 'iterate':
const event = new CustomEvent('iterateScan', {
detail: { nodeId }
});
document.dispatchEvent(event);
break;
case 'hide':
// Use enhanced method with reachability analysis
this.hideNodeWithReachabilityAnalysis(nodeId);
break;
case 'delete':
// Use enhanced method with reachability analysis
this.deleteNodeWithReachabilityAnalysis(nodeId);
break;
case 'details':
const node = this.nodes.get(nodeId);
if (node) {
this.showNodeDetails(node);
}
break;
default:
console.warn('Unknown action:', action);
}
}
/**
* Add an operation to the history stack
* @param {string} type - The type of operation ('hide', 'delete')
* @param {Object} data - The data needed to revert the operation
*/
addToHistory(type, data) {
this.history.push({ type, data });
}
/**
* Revert the last action
*/
async revertLastAction() {
const lastAction = this.history.pop();
if (!lastAction) {
console.log('No actions to revert.');
return;
}
switch (lastAction.type) {
case 'hide':
// Revert hiding nodes by un-hiding them
const updates = lastAction.data.nodeIds.map(id => ({ id: id, hidden: false }));
this.nodes.update(updates);
break;
case 'delete':
try {
const response = await fetch('/api/graph/revert', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(lastAction)
});
const result = await response.json();
if (result.success) {
console.log('Delete action reverted successfully on backend.');
// Re-add all nodes and edges from the history to the local view - use update instead of add
this.nodes.update(lastAction.data.nodes);
this.edges.update(lastAction.data.edges);
} else {
console.error('Failed to revert delete action on backend:', result.error);
// Push the action back onto the history stack if the API call failed
this.history.push(lastAction);
}
} catch (error) {
console.error('Error during revert API call:', error);
this.history.push(lastAction);
}
break;
}
}
/**
* Unhide all hidden nodes
*/
unhideAll() {
const allNodes = this.nodes.get({
filter: (node) => node.hidden === true
});
const updates = allNodes.map(node => ({ id: node.id, hidden: false }));
this.nodes.update(updates);
}
}
// Export for use in main.js

File diff suppressed because it is too large Load Diff

View File

@@ -32,19 +32,8 @@
<div class="form-container">
<div class="input-group">
<label for="target-domain">Target Domain</label>
<input type="text" id="target-domain" placeholder="example.com" autocomplete="off">
</div>
<div class="input-group">
<label for="max-depth">Recursion Depth</label>
<select id="max-depth">
<option value="1">Depth 1 - Direct relationships</option>
<option value="2" selected>Depth 2 - Recommended</option>
<option value="3">Depth 3 - Extended analysis</option>
<option value="4">Depth 4 - Deep reconnaissance</option>
<option value="5">Depth 5 - Maximum depth</option>
</select>
<label for="target-input">Target Domain or IP</label>
<input type="text" id="target-input" placeholder="example.com or 8.8.8.8" autocomplete="off">
</div>
<div class="button-group">
@@ -64,9 +53,9 @@
<span class="btn-icon">[EXPORT]</span>
<span>Download Results</span>
</button>
<button id="configure-api-keys" class="btn btn-secondary">
<button id="configure-settings" class="btn btn-secondary">
<span class="btn-icon">[API]</span>
<span>Configure API Keys</span>
<span>Settings</span>
</button>
</div>
</div>
@@ -90,46 +79,32 @@
<span class="status-label">Depth:</span>
<span id="depth-display" class="status-value">0/0</span>
</div>
<div class="status-row">
<span class="status-label">Progress:</span>
<span id="progress-display" class="status-value">0%</span>
</div>
<div class="status-row">
<span class="status-label">Indicators:</span>
<span id="indicators-display" class="status-value">0</span>
</div>
<div class="status-row">
<span class="status-label">Relationships:</span>
<span id="relationships-display" class="status-value">0</span>
</div>
</div>
<div class="progress-bar">
<div id="progress-fill" class="progress-fill"></div>
<div class="progress-container">
<div class="progress-info">
<span id="progress-label">Progress:</span>
<span id="progress-compact">0/0</span>
</div>
<div class="progress-bar">
<div id="progress-fill" class="progress-fill"></div>
</div>
<div class="progress-placeholder">
<span class="status-label">Reconnaissance on a large domain could take very long. Don´t try to scan Google.com.</span>
<br>
<span class="status-label">The main bottleneck is the request to crt.sh which is subject to harsh rate-limits.
The processing is done via a task-queue which operates by the pronciple of highest-priority-first: Long-running-tasks will be done last.</span>
</div>
</div>
</section>
<section class="visualization-panel">
<div class="panel-header">
<h2>Infrastructure Map</h2>
<div class="view-controls">
<div class="filter-group">
<label for="node-type-filter">Node Type:</label>
<select id="node-type-filter">
<option value="all">All</option>
<option value="domain">Domain</option>
<option value="ip">IP</option>
<option value="asn">ASN</option>
<option value="correlation_object">Correlation Object</option>
<option value="large_entity">Large Entity</option>
</select>
</div>
<div class="filter-group">
<label for="confidence-filter">Min Confidence:</label>
<input type="range" id="confidence-filter" min="0" max="1" step="0.1" value="0">
<span id="confidence-value">0</span>
</div>
</div>
</div>
<div id="network-graph" class="graph-container">
@@ -207,16 +182,28 @@
</div>
</div>
<div id="api-key-modal" class="modal">
<div id="settings-modal" class="modal">
<div class="modal-content">
<div class="modal-header">
<h3>Configure API Keys</h3>
<button id="api-key-modal-close" class="modal-close">[×]</button>
<h3>Settings</h3>
<button id="settings-modal-close" class="modal-close">[×]</button>
</div>
<div class="modal-body">
<p class="modal-description">
Enter your API keys for enhanced data providers. Keys are stored in memory for the current session only and are never saved to disk.
Configure scan settings and API keys. Keys are stored in memory for the current session only.
Only provide API-keys you dont use for anything else. Don´t enter an API-key if you don´t trust me (best practice would that you don´t).
</p>
<br>
<div class="input-group">
<label for="max-depth">Recursion Depth</label>
<select id="max-depth">
<option value="1">Depth 1 - Direct relationships</option>
<option value="2" selected>Depth 2 - Recommended</option>
<option value="3">Depth 3 - Extended analysis</option>
<option value="4">Depth 4 - Deep reconnaissance</option>
<option value="5">Depth 5 - Maximum depth</option>
</select>
</div>
<div id="api-key-inputs">
</div>
<div class="button-group" style="flex-direction: row; justify-content: flex-end;">
@@ -224,7 +211,7 @@
<span>Reset</span>
</button>
<button id="save-api-keys" class="btn btn-primary">
<span>Save Keys</span>
<span>Save API-Keys</span>
</button>
</div>
</div>

View File

@@ -48,3 +48,15 @@ def _is_valid_ip(ip: str) -> bool:
except (ValueError, AttributeError):
return False
def is_valid_target(target: str) -> bool:
"""
Checks if the target is a valid domain or IP address.
Args:
target: The target string to validate.
Returns:
True if the target is a valid domain or IP, False otherwise.
"""
return _is_valid_domain(target) or _is_valid_ip(target)