data-model #2
15
app.py
15
app.py
@ -463,12 +463,15 @@ def get_providers():
|
|||||||
# Get user-specific scanner
|
# Get user-specific scanner
|
||||||
user_session_id, scanner = get_user_scanner()
|
user_session_id, scanner = get_user_scanner()
|
||||||
|
|
||||||
if scanner:
|
if scanner and scanner.status == 'running':
|
||||||
# Updated debug print to be consistent with the new progress bar logic
|
status = scanner.get_scan_status()
|
||||||
completed_tasks = scanner.indicators_completed
|
currently_processing = status.get('currently_processing')
|
||||||
total_tasks = scanner.total_tasks_ever_enqueued
|
if currently_processing:
|
||||||
print(f"DEBUG: Task Progress - Completed: {completed_tasks}, Total Enqueued: {total_tasks}")
|
provider_name, target_item = currently_processing[0]
|
||||||
else:
|
print(f"DEBUG: RUNNING Task - Provider: {provider_name}, Target: {target_item}")
|
||||||
|
|
||||||
|
print(f"DEBUG: Task Queue Status - In Queue: {status.get('tasks_in_queue', 0)}, Completed: {status.get('tasks_completed', 0)}, Skipped: {status.get('tasks_skipped', 0)}, Rescheduled: {status.get('tasks_rescheduled', 0)}")
|
||||||
|
elif not scanner:
|
||||||
print("DEBUG: No active scanner session found.")
|
print("DEBUG: No active scanner session found.")
|
||||||
|
|
||||||
provider_info = scanner.get_provider_info()
|
provider_info = scanner.get_provider_info()
|
||||||
|
|||||||
@ -41,7 +41,7 @@ class GraphManager:
|
|||||||
self.correlation_index = {}
|
self.correlation_index = {}
|
||||||
# Compile regex for date filtering for efficiency
|
# Compile regex for date filtering for efficiency
|
||||||
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
|
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
|
||||||
self.EXCLUDED_KEYS = ['confidence', 'provider', 'timestamp', 'type','cert_validity_period_days','cert_source']
|
self.EXCLUDED_KEYS = ['crtsh_cert_validity_period_days','crtsh_cert_source','crtsh_cert_common_name']
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
"""Prepare GraphManager for pickling, excluding compiled regex."""
|
"""Prepare GraphManager for pickling, excluding compiled regex."""
|
||||||
|
|||||||
@ -396,7 +396,7 @@ class Scanner:
|
|||||||
if self._is_stop_requested():
|
if self._is_stop_requested():
|
||||||
print(f"Stop requested before processing {target_item}")
|
print(f"Stop requested before processing {target_item}")
|
||||||
break
|
break
|
||||||
self.currently_processing.add(target_item)
|
self.currently_processing.add(task_tuple)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.current_depth = depth
|
self.current_depth = depth
|
||||||
@ -444,7 +444,7 @@ class Scanner:
|
|||||||
self.total_tasks_ever_enqueued += 1
|
self.total_tasks_ever_enqueued += 1
|
||||||
finally:
|
finally:
|
||||||
with self.processing_lock:
|
with self.processing_lock:
|
||||||
self.currently_processing.discard(target_item)
|
self.currently_processing.discard(task_tuple)
|
||||||
|
|
||||||
if self._is_stop_requested():
|
if self._is_stop_requested():
|
||||||
print("Scan terminated due to stop request")
|
print("Scan terminated due to stop request")
|
||||||
@ -857,7 +857,11 @@ class Scanner:
|
|||||||
'graph_statistics': self.graph.get_statistics(),
|
'graph_statistics': self.graph.get_statistics(),
|
||||||
'task_queue_size': self.task_queue.qsize(),
|
'task_queue_size': self.task_queue.qsize(),
|
||||||
'currently_processing_count': currently_processing_count,
|
'currently_processing_count': currently_processing_count,
|
||||||
'currently_processing': currently_processing_list[:5]
|
'currently_processing': currently_processing_list[:5],
|
||||||
|
'tasks_in_queue': self.task_queue.qsize(),
|
||||||
|
'tasks_completed': self.indicators_completed,
|
||||||
|
'tasks_skipped': self.total_tasks_ever_enqueued - self.task_queue.qsize() - self.indicators_completed - self.tasks_re_enqueued,
|
||||||
|
'tasks_rescheduled': self.tasks_re_enqueued,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"ERROR: Exception in get_scan_status: {e}")
|
print(f"ERROR: Exception in get_scan_status: {e}")
|
||||||
@ -876,7 +880,11 @@ class Scanner:
|
|||||||
'graph_statistics': {},
|
'graph_statistics': {},
|
||||||
'task_queue_size': 0,
|
'task_queue_size': 0,
|
||||||
'currently_processing_count': 0,
|
'currently_processing_count': 0,
|
||||||
'currently_processing': []
|
'currently_processing': [],
|
||||||
|
'tasks_in_queue': 0,
|
||||||
|
'tasks_completed': 0,
|
||||||
|
'tasks_skipped': 0,
|
||||||
|
'tasks_rescheduled': 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _initialize_provider_states(self, target: str) -> None:
|
def _initialize_provider_states(self, target: str) -> None:
|
||||||
|
|||||||
@ -93,7 +93,7 @@ class CrtShProvider(BaseProvider):
|
|||||||
|
|
||||||
def query_domain(self, domain: str) -> ProviderResult:
|
def query_domain(self, domain: str) -> ProviderResult:
|
||||||
"""
|
"""
|
||||||
Query crt.sh for certificates containing the domain with caching support.
|
Query crt.sh for certificates containing the domain with efficient, deduplicated caching.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
domain: Domain to investigate
|
domain: Domain to investigate
|
||||||
@ -110,35 +110,45 @@ class CrtShProvider(BaseProvider):
|
|||||||
cache_file = self._get_cache_file_path(domain)
|
cache_file = self._get_cache_file_path(domain)
|
||||||
cache_status = self._get_cache_status(cache_file)
|
cache_status = self._get_cache_status(cache_file)
|
||||||
|
|
||||||
processed_certificates = []
|
|
||||||
result = ProviderResult()
|
result = ProviderResult()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if cache_status == "fresh":
|
if cache_status == "fresh":
|
||||||
result = self._load_from_cache(cache_file)
|
result = self._load_from_cache(cache_file)
|
||||||
self.logger.logger.info(f"Using cached crt.sh data for {domain}")
|
self.logger.logger.info(f"Using fresh cached crt.sh data for {domain}")
|
||||||
|
|
||||||
else: # "stale" or "not_found"
|
else: # "stale" or "not_found"
|
||||||
raw_certificates = self._query_crtsh_api(domain)
|
# Query the API for the latest certificates
|
||||||
|
new_raw_certs = self._query_crtsh_api(domain)
|
||||||
|
|
||||||
if self._stop_event and self._stop_event.is_set():
|
if self._stop_event and self._stop_event.is_set():
|
||||||
return ProviderResult()
|
return ProviderResult()
|
||||||
|
|
||||||
# Process raw data into the application's expected format
|
# Combine with old data if cache is stale
|
||||||
current_processed_certs = [self._extract_certificate_metadata(cert) for cert in raw_certificates]
|
|
||||||
|
|
||||||
if cache_status == "stale":
|
if cache_status == "stale":
|
||||||
# Load existing and append new processed certs
|
old_raw_certs = self._load_raw_data_from_cache(cache_file)
|
||||||
existing_result = self._load_from_cache(cache_file)
|
combined_certs = old_raw_certs + new_raw_certs
|
||||||
result = self._merge_results(existing_result, current_processed_certs, domain)
|
|
||||||
self.logger.logger.info(f"Refreshed and merged cache for {domain}")
|
# Deduplicate the combined list
|
||||||
|
seen_ids = set()
|
||||||
|
unique_certs = []
|
||||||
|
for cert in combined_certs:
|
||||||
|
cert_id = cert.get('id')
|
||||||
|
if cert_id not in seen_ids:
|
||||||
|
unique_certs.append(cert)
|
||||||
|
seen_ids.add(cert_id)
|
||||||
|
|
||||||
|
raw_certificates_to_process = unique_certs
|
||||||
|
self.logger.logger.info(f"Refreshed and merged cache for {domain}. Total unique certs: {len(raw_certificates_to_process)}")
|
||||||
else: # "not_found"
|
else: # "not_found"
|
||||||
# Create new result from processed certs
|
raw_certificates_to_process = new_raw_certs
|
||||||
result = self._process_certificates_to_result(domain, raw_certificates)
|
|
||||||
self.logger.logger.info(f"Created fresh result for {domain} ({result.get_relationship_count()} relationships)")
|
# Process the clean, deduplicated list of certificates
|
||||||
|
result = self._process_certificates_to_result(domain, raw_certificates_to_process)
|
||||||
|
self.logger.logger.info(f"Created fresh result for {domain} ({result.get_relationship_count()} relationships)")
|
||||||
|
|
||||||
# Save the result to cache
|
# Save the new result and the raw data to the cache
|
||||||
self._save_result_to_cache(cache_file, result, domain)
|
self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
self.logger.logger.error(f"API query failed for {domain}: {e}")
|
self.logger.logger.error(f"API query failed for {domain}: {e}")
|
||||||
@ -200,12 +210,22 @@ class CrtShProvider(BaseProvider):
|
|||||||
self.logger.logger.error(f"Failed to load cached certificates from {cache_file_path}: {e}")
|
self.logger.logger.error(f"Failed to load cached certificates from {cache_file_path}: {e}")
|
||||||
return ProviderResult()
|
return ProviderResult()
|
||||||
|
|
||||||
def _save_result_to_cache(self, cache_file_path: Path, result: ProviderResult, domain: str) -> None:
|
def _load_raw_data_from_cache(self, cache_file_path: Path) -> List[Dict[str, Any]]:
|
||||||
"""Save processed crt.sh result to a cache file."""
|
"""Load only the raw certificate data from a cache file."""
|
||||||
|
try:
|
||||||
|
with open(cache_file_path, 'r') as f:
|
||||||
|
cache_content = json.load(f)
|
||||||
|
return cache_content.get("raw_certificates", [])
|
||||||
|
except (json.JSONDecodeError, FileNotFoundError):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _save_result_to_cache(self, cache_file_path: Path, result: ProviderResult, raw_certificates: List[Dict[str, Any]], domain: str) -> None:
|
||||||
|
"""Save processed crt.sh result and raw data to a cache file."""
|
||||||
try:
|
try:
|
||||||
cache_data = {
|
cache_data = {
|
||||||
"domain": domain,
|
"domain": domain,
|
||||||
"last_upstream_query": datetime.now(timezone.utc).isoformat(),
|
"last_upstream_query": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"raw_certificates": raw_certificates, # Store the raw data for deduplication
|
||||||
"relationships": [
|
"relationships": [
|
||||||
{
|
{
|
||||||
"source_node": rel.source_node,
|
"source_node": rel.source_node,
|
||||||
@ -234,25 +254,6 @@ class CrtShProvider(BaseProvider):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.logger.warning(f"Failed to save cache file for {domain}: {e}")
|
self.logger.logger.warning(f"Failed to save cache file for {domain}: {e}")
|
||||||
|
|
||||||
def _merge_results(self, existing_result: ProviderResult, new_certificates: List[Dict[str, Any]], domain: str) -> ProviderResult:
|
|
||||||
"""Merge new certificate data with existing cached result."""
|
|
||||||
# Create a fresh result from the new certificates
|
|
||||||
new_result = self._process_certificates_to_result(domain, new_certificates)
|
|
||||||
|
|
||||||
# Simple merge strategy: combine all relationships and attributes
|
|
||||||
# In practice, you might want more sophisticated deduplication
|
|
||||||
merged_result = ProviderResult()
|
|
||||||
|
|
||||||
# Add existing relationships and attributes
|
|
||||||
merged_result.relationships.extend(existing_result.relationships)
|
|
||||||
merged_result.attributes.extend(existing_result.attributes)
|
|
||||||
|
|
||||||
# Add new relationships and attributes
|
|
||||||
merged_result.relationships.extend(new_result.relationships)
|
|
||||||
merged_result.attributes.extend(new_result.attributes)
|
|
||||||
|
|
||||||
return merged_result
|
|
||||||
|
|
||||||
def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]:
|
def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]:
|
||||||
"""Query crt.sh API for raw certificate data."""
|
"""Query crt.sh API for raw certificate data."""
|
||||||
url = f"{self.base_url}?q={quote(domain)}&output=json"
|
url = f"{self.base_url}?q={quote(domain)}&output=json"
|
||||||
@ -261,7 +262,12 @@ class CrtShProvider(BaseProvider):
|
|||||||
if not response or response.status_code != 200:
|
if not response or response.status_code != 200:
|
||||||
raise requests.exceptions.RequestException(f"crt.sh API returned status {response.status_code if response else 'None'}")
|
raise requests.exceptions.RequestException(f"crt.sh API returned status {response.status_code if response else 'None'}")
|
||||||
|
|
||||||
certificates = response.json()
|
try:
|
||||||
|
certificates = response.json()
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
self.logger.logger.error(f"crt.sh returned invalid JSON for {domain}")
|
||||||
|
return []
|
||||||
|
|
||||||
if not certificates:
|
if not certificates:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@ -324,7 +330,7 @@ class CrtShProvider(BaseProvider):
|
|||||||
result.add_relationship(
|
result.add_relationship(
|
||||||
source_node=domain,
|
source_node=domain,
|
||||||
target_node=discovered_domain,
|
target_node=discovered_domain,
|
||||||
relationship_type='san_certificate',
|
relationship_type='crtsh_san_certificate',
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
raw_data={'relationship_type': 'certificate_discovery'}
|
raw_data={'relationship_type': 'certificate_discovery'}
|
||||||
@ -333,7 +339,7 @@ class CrtShProvider(BaseProvider):
|
|||||||
self.log_relationship_discovery(
|
self.log_relationship_discovery(
|
||||||
source_node=domain,
|
source_node=domain,
|
||||||
target_node=discovered_domain,
|
target_node=discovered_domain,
|
||||||
relationship_type='san_certificate',
|
relationship_type='crtsh_san_certificate',
|
||||||
confidence_score=confidence,
|
confidence_score=confidence,
|
||||||
raw_data={'relationship_type': 'certificate_discovery'},
|
raw_data={'relationship_type': 'certificate_discovery'},
|
||||||
discovery_method="certificate_transparency_analysis"
|
discovery_method="certificate_transparency_analysis"
|
||||||
|
|||||||
@ -107,7 +107,7 @@ class DNSProvider(BaseProvider):
|
|||||||
result.add_relationship(
|
result.add_relationship(
|
||||||
source_node=ip,
|
source_node=ip,
|
||||||
target_node=hostname,
|
target_node=hostname,
|
||||||
relationship_type='ptr_record',
|
relationship_type='dns_ptr_record',
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
confidence=0.8,
|
confidence=0.8,
|
||||||
raw_data={
|
raw_data={
|
||||||
@ -125,7 +125,7 @@ class DNSProvider(BaseProvider):
|
|||||||
self.log_relationship_discovery(
|
self.log_relationship_discovery(
|
||||||
source_node=ip,
|
source_node=ip,
|
||||||
target_node=hostname,
|
target_node=hostname,
|
||||||
relationship_type='ptr_record',
|
relationship_type='dns_ptr_record',
|
||||||
confidence_score=0.8,
|
confidence_score=0.8,
|
||||||
raw_data={
|
raw_data={
|
||||||
'query_type': 'PTR',
|
'query_type': 'PTR',
|
||||||
@ -202,7 +202,7 @@ class DNSProvider(BaseProvider):
|
|||||||
'value': target,
|
'value': target,
|
||||||
'ttl': response.ttl
|
'ttl': response.ttl
|
||||||
}
|
}
|
||||||
relationship_type = f"{record_type.lower()}_record"
|
relationship_type = f"dns_{record_type.lower()}_record"
|
||||||
confidence = 0.8
|
confidence = 0.8
|
||||||
|
|
||||||
# Add relationship
|
# Add relationship
|
||||||
|
|||||||
@ -222,7 +222,7 @@ class ShodanProvider(BaseProvider):
|
|||||||
result.add_relationship(
|
result.add_relationship(
|
||||||
source_node=ip,
|
source_node=ip,
|
||||||
target_node=hostname,
|
target_node=hostname,
|
||||||
relationship_type='a_record',
|
relationship_type='shodan_a_record',
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
confidence=0.8,
|
confidence=0.8,
|
||||||
raw_data=data
|
raw_data=data
|
||||||
@ -230,7 +230,7 @@ class ShodanProvider(BaseProvider):
|
|||||||
self.log_relationship_discovery(
|
self.log_relationship_discovery(
|
||||||
source_node=ip,
|
source_node=ip,
|
||||||
target_node=hostname,
|
target_node=hostname,
|
||||||
relationship_type='a_record',
|
relationship_type='shodan_a_record',
|
||||||
confidence_score=0.8,
|
confidence_score=0.8,
|
||||||
raw_data=data,
|
raw_data=data,
|
||||||
discovery_method="shodan_host_lookup"
|
discovery_method="shodan_host_lookup"
|
||||||
@ -240,7 +240,7 @@ class ShodanProvider(BaseProvider):
|
|||||||
result.add_relationship(
|
result.add_relationship(
|
||||||
source_node=ip,
|
source_node=ip,
|
||||||
target_node=asn_name,
|
target_node=asn_name,
|
||||||
relationship_type='asn_membership',
|
relationship_type='shodan_asn_membership',
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
confidence=0.7,
|
confidence=0.7,
|
||||||
raw_data=data
|
raw_data=data
|
||||||
@ -248,7 +248,7 @@ class ShodanProvider(BaseProvider):
|
|||||||
self.log_relationship_discovery(
|
self.log_relationship_discovery(
|
||||||
source_node=ip,
|
source_node=ip,
|
||||||
target_node=asn_name,
|
target_node=asn_name,
|
||||||
relationship_type='asn_membership',
|
relationship_type='shodan_asn_membership',
|
||||||
confidence_score=0.7,
|
confidence_score=0.7,
|
||||||
raw_data=data,
|
raw_data=data,
|
||||||
discovery_method="shodan_asn_lookup"
|
discovery_method="shodan_asn_lookup"
|
||||||
@ -257,9 +257,9 @@ class ShodanProvider(BaseProvider):
|
|||||||
for port in value:
|
for port in value:
|
||||||
result.add_attribute(
|
result.add_attribute(
|
||||||
target_node=ip,
|
target_node=ip,
|
||||||
name='open_port',
|
name='shodan_open_port',
|
||||||
value=port,
|
value=port,
|
||||||
attr_type='network_info',
|
attr_type='shodan_network_info',
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
confidence=0.9
|
confidence=0.9
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user