some fixes for UX, correlation engine
This commit is contained in:
parent
3ee23c9d05
commit
bcd79ae2f5
@ -605,11 +605,6 @@ class Scanner:
|
||||
processing_key = (provider_name, target_item)
|
||||
self.currently_processing.discard(processing_key)
|
||||
|
||||
# PHASE 2: Run correlations on all discovered nodes
|
||||
if not self._is_stop_requested():
|
||||
print(f"\n=== PHASE 2: Running correlation analysis ===")
|
||||
self._run_correlation_phase(max_depth, processed_tasks)
|
||||
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
self.status = ScanStatus.FAILED
|
||||
@ -633,6 +628,10 @@ class Scanner:
|
||||
else:
|
||||
self.status = ScanStatus.COMPLETED
|
||||
|
||||
if self.status in [ScanStatus.COMPLETED, ScanStatus.STOPPED]:
|
||||
print(f"\n=== PHASE 2: Running correlation analysis ===")
|
||||
self._run_correlation_phase(max_depth, processed_tasks)
|
||||
|
||||
self.status_logger_stop_event.set()
|
||||
if self.status_logger_thread and self.status_logger_thread.is_alive():
|
||||
self.status_logger_thread.join(timeout=2.0)
|
||||
|
||||
@ -27,6 +27,7 @@ class CorrelationProvider(BaseProvider):
|
||||
'cert_validity_period_days',
|
||||
'cert_issuer_name',
|
||||
'cert_entry_timestamp',
|
||||
'cert_serial_number', # useless
|
||||
'cert_not_before',
|
||||
'cert_not_after',
|
||||
'dns_ttl',
|
||||
|
||||
@ -2,43 +2,22 @@
|
||||
|
||||
import json
|
||||
import re
|
||||
import psycopg2
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Set, Optional
|
||||
from urllib.parse import quote
|
||||
from datetime import datetime, timezone
|
||||
import requests
|
||||
from psycopg2 import pool
|
||||
|
||||
from .base_provider import BaseProvider
|
||||
from core.provider_result import ProviderResult
|
||||
from utils.helpers import _is_valid_domain
|
||||
from core.logger import get_forensic_logger
|
||||
|
||||
# --- Global Instance for PostgreSQL Connection Pool ---
|
||||
# This pool will be created once per worker process and is not part of the
|
||||
# CrtShProvider instance, thus avoiding pickling errors.
|
||||
db_pool = None
|
||||
try:
|
||||
db_pool = psycopg2.pool.SimpleConnectionPool(
|
||||
1, 5,
|
||||
host='crt.sh',
|
||||
port=5432,
|
||||
user='guest',
|
||||
dbname='certwatch',
|
||||
sslmode='prefer',
|
||||
connect_timeout=60
|
||||
)
|
||||
# Use a generic logger here as this is at the module level
|
||||
get_forensic_logger().logger.info("crt.sh: Global PostgreSQL connection pool created successfully.")
|
||||
except Exception as e:
|
||||
get_forensic_logger().logger.warning(f"crt.sh: Failed to create global DB connection pool: {e}. Will fall back to HTTP API.")
|
||||
|
||||
|
||||
class CrtShProvider(BaseProvider):
|
||||
"""
|
||||
Provider for querying crt.sh certificate transparency database.
|
||||
FIXED: Now properly creates domain and CA nodes instead of large entities.
|
||||
REMOVED: All PostgreSQL logic to rely exclusively on the HTTP API for stability.
|
||||
Returns standardized ProviderResult objects with caching support.
|
||||
"""
|
||||
|
||||
@ -53,7 +32,7 @@ class CrtShProvider(BaseProvider):
|
||||
self.base_url = "https://crt.sh/"
|
||||
self._stop_event = None
|
||||
|
||||
# Initialize cache directory (separate from BaseProvider's HTTP cache)
|
||||
# Initialize cache directory
|
||||
self.domain_cache_dir = Path('cache') / 'crtsh'
|
||||
self.domain_cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@ -116,8 +95,7 @@ class CrtShProvider(BaseProvider):
|
||||
|
||||
def query_domain(self, domain: str) -> ProviderResult:
|
||||
"""
|
||||
FIXED: Query crt.sh for certificates containing the domain.
|
||||
Now properly creates domain and CA nodes instead of large entities.
|
||||
Query crt.sh for certificates containing the domain via HTTP API.
|
||||
|
||||
Args:
|
||||
domain: Domain to investigate
|
||||
@ -140,60 +118,56 @@ class CrtShProvider(BaseProvider):
|
||||
if cache_status == "fresh":
|
||||
result = self._load_from_cache(cache_file)
|
||||
self.logger.logger.info(f"Using fresh cached crt.sh data for {domain}")
|
||||
return result
|
||||
|
||||
else: # "stale" or "not_found"
|
||||
# Query the API for the latest certificates
|
||||
new_raw_certs = self._query_crtsh(domain)
|
||||
# For "stale" or "not_found", we must query the API.
|
||||
new_raw_certs = self._query_crtsh_api(domain)
|
||||
|
||||
if self._stop_event and self._stop_event.is_set():
|
||||
return ProviderResult()
|
||||
if self._stop_event and self._stop_event.is_set():
|
||||
return ProviderResult()
|
||||
|
||||
# Combine with old data if cache is stale
|
||||
if cache_status == "stale":
|
||||
old_raw_certs = self._load_raw_data_from_cache(cache_file)
|
||||
combined_certs = old_raw_certs + new_raw_certs
|
||||
# Combine with old data if cache is stale
|
||||
if cache_status == "stale":
|
||||
old_raw_certs = self._load_raw_data_from_cache(cache_file)
|
||||
combined_certs = old_raw_certs + new_raw_certs
|
||||
|
||||
# Deduplicate the combined list
|
||||
seen_ids = set()
|
||||
unique_certs = []
|
||||
for cert in combined_certs:
|
||||
cert_id = cert.get('id')
|
||||
if cert_id not in seen_ids:
|
||||
unique_certs.append(cert)
|
||||
seen_ids.add(cert_id)
|
||||
# Deduplicate the combined list
|
||||
seen_ids = set()
|
||||
unique_certs = []
|
||||
for cert in combined_certs:
|
||||
cert_id = cert.get('id')
|
||||
if cert_id not in seen_ids:
|
||||
unique_certs.append(cert)
|
||||
seen_ids.add(cert_id)
|
||||
|
||||
raw_certificates_to_process = unique_certs
|
||||
self.logger.logger.info(f"Refreshed and merged cache for {domain}. Total unique certs: {len(raw_certificates_to_process)}")
|
||||
else: # "not_found"
|
||||
raw_certificates_to_process = new_raw_certs
|
||||
raw_certificates_to_process = unique_certs
|
||||
self.logger.logger.info(f"Refreshed and merged cache for {domain}. Total unique certs: {len(raw_certificates_to_process)}")
|
||||
else: # "not_found"
|
||||
raw_certificates_to_process = new_raw_certs
|
||||
|
||||
# FIXED: Process certificates to create proper domain and CA nodes
|
||||
result = self._process_certificates_to_result_fixed(domain, raw_certificates_to_process)
|
||||
self.logger.logger.info(f"Created fresh result for {domain} ({result.get_relationship_count()} relationships)")
|
||||
result = self._process_certificates_to_result_fixed(domain, raw_certificates_to_process)
|
||||
self.logger.logger.info(f"Created fresh result for {domain} ({result.get_relationship_count()} relationships)")
|
||||
|
||||
# Save the new result and the raw data to the cache
|
||||
self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)
|
||||
# Save the new result and the raw data to the cache
|
||||
self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)
|
||||
|
||||
except (requests.exceptions.RequestException, psycopg2.Error) as e:
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.logger.logger.error(f"Upstream query failed for {domain}: {e}")
|
||||
if cache_status != "not_found":
|
||||
result = self._load_from_cache(cache_file)
|
||||
self.logger.logger.warning(f"Using stale cache for {domain} due to API failure.")
|
||||
else:
|
||||
raise e # Re-raise if there's no cache to fall back on
|
||||
# **BUG FIX:** Always re-raise the exception. This signals a failure to the
|
||||
# scanner, allowing its retry logic to handle the transient error.
|
||||
raise e
|
||||
|
||||
return result
|
||||
|
||||
def query_ip(self, ip: str) -> ProviderResult:
|
||||
"""
|
||||
Query crt.sh for certificates containing the IP address.
|
||||
Note: crt.sh doesn't typically index by IP, so this returns empty results.
|
||||
crt.sh does not support IP-based certificate queries effectively via its API.
|
||||
|
||||
Args:
|
||||
ip: IP address to investigate
|
||||
|
||||
Returns:
|
||||
Empty ProviderResult (crt.sh doesn't support IP-based certificate queries effectively)
|
||||
Empty ProviderResult
|
||||
"""
|
||||
return ProviderResult()
|
||||
|
||||
@ -278,58 +252,6 @@ class CrtShProvider(BaseProvider):
|
||||
except Exception as e:
|
||||
self.logger.logger.warning(f"Failed to save cache file for {domain}: {e}")
|
||||
|
||||
def _query_crtsh(self, domain: str) -> List[Dict[str, Any]]:
|
||||
"""Query crt.sh, trying the database first and falling back to the API."""
|
||||
global db_pool
|
||||
if db_pool:
|
||||
try:
|
||||
self.logger.logger.info(f"crt.sh: Attempting DB query for {domain}")
|
||||
return self._query_crtsh_db(domain)
|
||||
except psycopg2.Error as e:
|
||||
self.logger.logger.warning(f"crt.sh: DB query failed for {domain}: {e}. Falling back to HTTP API.")
|
||||
return self._query_crtsh_api(domain)
|
||||
else:
|
||||
self.logger.logger.info(f"crt.sh: No DB connection pool. Using HTTP API for {domain}")
|
||||
return self._query_crtsh_api(domain)
|
||||
|
||||
def _query_crtsh_db(self, domain: str) -> List[Dict[str, Any]]:
|
||||
"""Query crt.sh database for raw certificate data."""
|
||||
global db_pool
|
||||
conn = db_pool.getconn()
|
||||
try:
|
||||
with conn.cursor() as cursor:
|
||||
query = """
|
||||
SELECT
|
||||
c.id,
|
||||
x509_serialnumber(c.certificate) as serial_number,
|
||||
x509_notbefore(c.certificate) as not_before,
|
||||
x509_notafter(c.certificate) as not_after,
|
||||
c.issuer_ca_id,
|
||||
ca.name as issuer_name,
|
||||
x509_commonname(c.certificate) as common_name,
|
||||
identities(c.certificate)::text as name_value
|
||||
FROM certificate c
|
||||
LEFT JOIN ca ON c.issuer_ca_id = ca.id
|
||||
WHERE identities(c.certificate) @@ plainto_tsquery(%s)
|
||||
ORDER BY c.id DESC
|
||||
LIMIT 5000;
|
||||
"""
|
||||
cursor.execute(query, (domain,))
|
||||
|
||||
results = []
|
||||
columns = [desc[0] for desc in cursor.description]
|
||||
for row in cursor.fetchall():
|
||||
row_dict = dict(zip(columns, row))
|
||||
if row_dict.get('not_before'):
|
||||
row_dict['not_before'] = row_dict['not_before'].isoformat()
|
||||
if row_dict.get('not_after'):
|
||||
row_dict['not_after'] = row_dict['not_after'].isoformat()
|
||||
results.append(row_dict)
|
||||
self.logger.logger.info(f"crt.sh: DB query for {domain} returned {len(results)} records.")
|
||||
return results
|
||||
finally:
|
||||
db_pool.putconn(conn)
|
||||
|
||||
def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]:
|
||||
"""Query crt.sh API for raw certificate data."""
|
||||
url = f"{self.base_url}?q={quote(domain)}&output=json"
|
||||
@ -351,8 +273,7 @@ class CrtShProvider(BaseProvider):
|
||||
|
||||
def _process_certificates_to_result_fixed(self, query_domain: str, certificates: List[Dict[str, Any]]) -> ProviderResult:
|
||||
"""
|
||||
FIXED: Process certificates to create proper domain and CA nodes.
|
||||
Now creates individual domain nodes instead of large entities.
|
||||
Process certificates to create proper domain and CA nodes.
|
||||
"""
|
||||
result = ProviderResult()
|
||||
|
||||
@ -375,6 +296,7 @@ class CrtShProvider(BaseProvider):
|
||||
processed_issuers = set()
|
||||
|
||||
for i, cert_data in enumerate(certificates):
|
||||
# Check for stop event inside the loop to make it responsive.
|
||||
if i % 10 == 0 and self._stop_event and self._stop_event.is_set():
|
||||
self.logger.logger.info(f"CrtSh processing cancelled at certificate {i} for domain: {query_domain}")
|
||||
break
|
||||
@ -383,10 +305,9 @@ class CrtShProvider(BaseProvider):
|
||||
cert_domains = self._extract_domains_from_certificate(cert_data)
|
||||
all_discovered_domains.update(cert_domains)
|
||||
|
||||
# FIXED: Create CA nodes for certificate issuers (not as domain metadata)
|
||||
# Create CA nodes for certificate issuers
|
||||
issuer_name = self._parse_issuer_organization(cert_data.get('issuer_name', ''))
|
||||
if issuer_name and issuer_name not in processed_issuers:
|
||||
# Create relationship from query domain to CA
|
||||
result.add_relationship(
|
||||
source_node=query_domain,
|
||||
target_node=issuer_name,
|
||||
@ -403,7 +324,6 @@ class CrtShProvider(BaseProvider):
|
||||
if not _is_valid_domain(cert_domain):
|
||||
continue
|
||||
|
||||
# Add certificate attributes to the domain
|
||||
for key, value in cert_metadata.items():
|
||||
if value is not None:
|
||||
result.add_attribute(
|
||||
@ -416,12 +336,12 @@ class CrtShProvider(BaseProvider):
|
||||
metadata={'certificate_id': cert_data.get('id')}
|
||||
)
|
||||
|
||||
# Check for stop event before creating final relationships.
|
||||
if self._stop_event and self._stop_event.is_set():
|
||||
self.logger.logger.info(f"CrtSh query cancelled before relationship creation for domain: {query_domain}")
|
||||
return result
|
||||
|
||||
# FIXED: Create selective relationships to avoid large entities
|
||||
# Only create relationships to domains that are closely related
|
||||
# Create selective relationships to avoid large entities
|
||||
for discovered_domain in all_discovered_domains:
|
||||
if discovered_domain == query_domain:
|
||||
continue
|
||||
@ -429,8 +349,6 @@ class CrtShProvider(BaseProvider):
|
||||
if not _is_valid_domain(discovered_domain):
|
||||
continue
|
||||
|
||||
# FIXED: Only create relationships for domains that share a meaningful connection
|
||||
# This prevents creating too many relationships that trigger large entity creation
|
||||
if self._should_create_relationship(query_domain, discovered_domain):
|
||||
confidence = self._calculate_domain_relationship_confidence(
|
||||
query_domain, discovered_domain, [], all_discovered_domains
|
||||
@ -459,18 +377,14 @@ class CrtShProvider(BaseProvider):
|
||||
|
||||
def _should_create_relationship(self, source_domain: str, target_domain: str) -> bool:
|
||||
"""
|
||||
FIXED: Determine if a relationship should be created between two domains.
|
||||
This helps avoid creating too many relationships that trigger large entity creation.
|
||||
Determine if a relationship should be created between two domains.
|
||||
"""
|
||||
# Always create relationships for subdomains
|
||||
if target_domain.endswith(f'.{source_domain}') or source_domain.endswith(f'.{target_domain}'):
|
||||
return True
|
||||
|
||||
# Create relationships for domains that share a common parent (up to 2 levels)
|
||||
source_parts = source_domain.split('.')
|
||||
target_parts = target_domain.split('.')
|
||||
|
||||
# Check if they share the same root domain (last 2 parts)
|
||||
if len(source_parts) >= 2 and len(target_parts) >= 2:
|
||||
source_root = '.'.join(source_parts[-2:])
|
||||
target_root = '.'.join(target_parts[-2:])
|
||||
@ -504,7 +418,6 @@ class CrtShProvider(BaseProvider):
|
||||
metadata['is_currently_valid'] = self._is_cert_valid(cert_data)
|
||||
metadata['expires_soon'] = (not_after - datetime.now(timezone.utc)).days <= 30
|
||||
|
||||
# Keep raw date format or convert to standard format
|
||||
metadata['not_before'] = not_before.isoformat()
|
||||
metadata['not_after'] = not_after.isoformat()
|
||||
|
||||
@ -586,14 +499,12 @@ class CrtShProvider(BaseProvider):
|
||||
"""Extract all domains from certificate data."""
|
||||
domains = set()
|
||||
|
||||
# Extract from common name
|
||||
common_name = cert_data.get('common_name', '')
|
||||
if common_name:
|
||||
cleaned_cn = self._clean_domain_name(common_name)
|
||||
if cleaned_cn:
|
||||
domains.update(cleaned_cn)
|
||||
|
||||
# Extract from name_value field (contains SANs)
|
||||
name_value = cert_data.get('name_value', '')
|
||||
if name_value:
|
||||
for line in name_value.split('\n'):
|
||||
@ -640,7 +551,6 @@ class CrtShProvider(BaseProvider):
|
||||
"""Calculate confidence score for domain relationship based on various factors."""
|
||||
base_confidence = 0.9
|
||||
|
||||
# Adjust confidence based on domain relationship context
|
||||
relationship_context = self._determine_relationship_context(domain2, domain1)
|
||||
|
||||
if relationship_context == 'exact_match':
|
||||
@ -672,12 +582,10 @@ class CrtShProvider(BaseProvider):
|
||||
"""
|
||||
cert_count = len(certificates)
|
||||
|
||||
# Heuristic 1: Check if the number of certs hits a known hard limit.
|
||||
if cert_count >= 10000:
|
||||
return f"Result likely truncated; received {cert_count} certificates, which may be the maximum limit."
|
||||
|
||||
# Heuristic 2: Check if all returned certificates are old.
|
||||
if cert_count > 1000: # Only apply this for a reasonable number of certs
|
||||
if cert_count > 1000:
|
||||
latest_expiry = None
|
||||
for cert in certificates:
|
||||
try:
|
||||
|
||||
@ -474,6 +474,7 @@ input[type="text"]:focus, select:focus {
|
||||
flex-wrap: wrap;
|
||||
gap: 0.75rem;
|
||||
align-items: center;
|
||||
max-height: 3rem;
|
||||
}
|
||||
|
||||
.legend-item {
|
||||
|
||||
@ -329,7 +329,7 @@ class DNSReconApp {
|
||||
console.log(`Scan started for ${target} with depth ${maxDepth}`);
|
||||
|
||||
// Start polling immediately with faster interval for responsiveness
|
||||
this.startPolling(1000);
|
||||
this.startPolling();
|
||||
|
||||
// Force an immediate status update
|
||||
console.log('Forcing immediate status update...');
|
||||
@ -738,7 +738,7 @@ class DNSReconApp {
|
||||
this.setUIState('scanning', task_queue_size);
|
||||
this.showSuccess('Scan is running');
|
||||
// Increase polling frequency for active scans
|
||||
this.startPolling(1000); // Poll every 1 second for running scans
|
||||
this.startPolling(5000); // Poll every 5 second for running scans
|
||||
this.updateConnectionStatus('active');
|
||||
break;
|
||||
|
||||
@ -2039,6 +2039,27 @@ class DNSReconApp {
|
||||
setTimeout(() => this.updateGraph(), 500);
|
||||
}
|
||||
|
||||
// Immediately update the modal view
|
||||
if (this.graphManager) {
|
||||
const largeEntityNode = this.graphManager.nodes.get(largeEntityId);
|
||||
if (largeEntityNode && largeEntityNode.attributes) {
|
||||
|
||||
// Find and update the 'nodes' attribute
|
||||
const nodesAttribute = largeEntityNode.attributes.find(attr => attr.name === 'nodes');
|
||||
if (nodesAttribute && Array.isArray(nodesAttribute.value)) {
|
||||
nodesAttribute.value = nodesAttribute.value.filter(id => id !== nodeId);
|
||||
}
|
||||
|
||||
// Find and update the 'count' attribute
|
||||
const countAttribute = largeEntityNode.attributes.find(attr => attr.name === 'count');
|
||||
if (countAttribute) {
|
||||
countAttribute.value = (countAttribute.value || 0) - 1;
|
||||
}
|
||||
|
||||
// Re-render the modal with the updated data
|
||||
this.showNodeModal(largeEntityNode);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new Error(response.error || 'Extraction failed on the server.');
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user