modularize, shodan qs

This commit is contained in:
overcuriousity
2025-09-13 17:14:16 +02:00
parent 2925512a4d
commit 930fdca500
10 changed files with 275 additions and 147 deletions

View File

@@ -3,6 +3,8 @@
import threading
import traceback
import time
import os
import importlib
from typing import List, Set, Dict, Any, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, Future
from collections import defaultdict, deque
@@ -11,9 +13,7 @@ from datetime import datetime, timezone
from core.graph_manager import GraphManager, NodeType, RelationshipType
from core.logger import get_forensic_logger, new_session
from utils.helpers import _is_valid_ip, _is_valid_domain
from providers.crtsh_provider import CrtShProvider
from providers.dns_provider import DNSProvider
from providers.shodan_provider import ShodanProvider
from providers.base_provider import BaseProvider
class ScanStatus:
@@ -61,13 +61,6 @@ class Scanner:
self.max_workers = self.config.max_concurrent_requests
self.executor = None
# Provider eligibility mapping
self.provider_eligibility = {
'dns': {'domains': True, 'ips': True},
'crtsh': {'domains': True, 'ips': False},
'shodan': {'domains': True, 'ips': True}
}
# Initialize providers with session config
print("Calling _initialize_providers with session config...")
self._initialize_providers()
@@ -163,25 +156,27 @@ class Scanner:
self.providers = []
print("Initializing providers with session config...")
# Provider classes mapping
provider_classes = {
'dns': DNSProvider,
'crtsh': CrtShProvider,
'shodan': ShodanProvider
}
for provider_name, provider_class in provider_classes.items():
if self.config.is_provider_enabled(provider_name):
provider_dir = os.path.join(os.path.dirname(__file__), '..', 'providers')
for filename in os.listdir(provider_dir):
if filename.endswith('_provider.py') and not filename.startswith('base'):
module_name = f"providers.{filename[:-3]}"
try:
provider = provider_class(session_config=self.config)
if provider.is_available():
provider.set_stop_event(self.stop_event)
self.providers.append(provider)
print(f"{provider_name.title()} provider initialized successfully for session")
else:
print(f"{provider_name.title()} provider is not available")
module = importlib.import_module(module_name)
for attribute_name in dir(module):
attribute = getattr(module, attribute_name)
if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider:
provider_class = attribute
provider_name = provider_class(session_config=self.config).get_name()
if self.config.is_provider_enabled(provider_name):
provider = provider_class(session_config=self.config)
if provider.is_available():
provider.set_stop_event(self.stop_event)
self.providers.append(provider)
print(f"{provider.get_display_name()} provider initialized successfully for session")
else:
print(f"{provider.get_display_name()} provider is not available")
except Exception as e:
print(f"✗ Failed to initialize {provider_name.title()} provider: {e}")
print(f"✗ Failed to initialize provider from {filename}: {e}")
traceback.print_exc()
print(f"Initialized {len(self.providers)} providers for session")
@@ -417,13 +412,11 @@ class Scanner:
target_key = 'ips' if is_ip else 'domains'
for provider in self.providers:
provider_name = provider.get_name()
if provider_name in self.provider_eligibility:
if self.provider_eligibility[provider_name][target_key]:
if not self._already_queried_provider(target, provider_name):
eligible.append(provider)
else:
print(f"Skipping {provider_name} for {target} - already queried")
if provider.get_eligibility().get(target_key):
if not self._already_queried_provider(target, provider.get_name()):
eligible.append(provider)
else:
print(f"Skipping {provider.get_name()} for {target} - already queried")
return eligible
@@ -740,4 +733,36 @@ class Scanner:
stats = {}
for provider in self.providers:
stats[provider.get_name()] = provider.get_statistics()
return stats
return stats
def get_provider_info(self) -> Dict[str, Dict[str, Any]]:
"""Get information about all available providers."""
info = {}
provider_dir = os.path.join(os.path.dirname(__file__), '..', 'providers')
for filename in os.listdir(provider_dir):
if filename.endswith('_provider.py') and not filename.startswith('base'):
module_name = f"providers.{filename[:-3]}"
try:
module = importlib.import_module(module_name)
for attribute_name in dir(module):
attribute = getattr(module, attribute_name)
if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider:
provider_class = attribute
# Instantiate to get metadata, even if not fully configured
temp_provider = provider_class(session_config=self.config)
provider_name = temp_provider.get_name()
# Find the actual provider instance if it exists, to get live stats
live_provider = next((p for p in self.providers if p.get_name() == provider_name), None)
info[provider_name] = {
'display_name': temp_provider.get_display_name(),
'requires_api_key': temp_provider.requires_api_key(),
'statistics': live_provider.get_statistics() if live_provider else temp_provider.get_statistics(),
'enabled': self.config.is_provider_enabled(provider_name),
'rate_limit': self.config.get_rate_limit(provider_name),
}
except Exception as e:
print(f"✗ Failed to get info for provider from {filename}: {e}")
traceback.print_exc()
return info