Compare commits

..

No commits in common. "3ee23c9d05c7b73c0ea2eab13d65d928016b66ea" and "1558731c1ce9a229913408b9775a46f7c37b508d" have entirely different histories.

6 changed files with 351 additions and 366 deletions

View File

@ -114,6 +114,36 @@ class GraphManager:
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
"""
Removes a node from a large entity's internal lists and updates its count.
This prepares the large entity for the node's promotion to a regular node.
"""
if not self.graph.has_node(large_entity_id):
return False
node_data = self.graph.nodes[large_entity_id]
attributes = node_data.get('attributes', [])
# Find the 'nodes' attribute dictionary in the list
nodes_attr = next((attr for attr in attributes if attr.get('name') == 'nodes'), None)
# Remove from the list of member nodes
if nodes_attr and 'value' in nodes_attr and isinstance(nodes_attr['value'], list) and node_id_to_extract in nodes_attr['value']:
nodes_attr['value'].remove(node_id_to_extract)
# Find the 'count' attribute and update it
count_attr = next((attr for attr in attributes if attr.get('name') == 'count'), None)
if count_attr:
count_attr['value'] = len(nodes_attr['value'])
else:
# This can happen if the node was already extracted, which is not an error.
print(f"Warning: Node {node_id_to_extract} not found in the 'nodes' list of {large_entity_id}.")
return True # Proceed as if successful
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
def remove_node(self, node_id: str) -> bool:
"""Remove a node and its connected edges from the graph."""
if not self.graph.has_node(node_id):

View File

@ -761,7 +761,7 @@ class Scanner:
def _process_provider_task(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]:
"""
Manages the entire process for a given target and provider.
This version is generalized to handle all relationships dynamically.
FIXED: Don't enqueue correlation tasks during normal processing.
"""
if self._is_stop_requested():
return set(), set(), False
@ -773,6 +773,7 @@ class Scanner:
self._initialize_provider_states(target)
new_targets = set()
large_entity_members = set()
provider_successful = True
try:
@ -781,17 +782,19 @@ class Scanner:
if provider_result is None:
provider_successful = False
elif not self._is_stop_requested():
# Pass all relationships to be processed
discovered, is_large_entity = self._process_provider_result_unified(
target, provider, provider_result, depth
)
new_targets.update(discovered)
if is_large_entity:
large_entity_members.update(discovered)
else:
new_targets.update(discovered)
except Exception as e:
provider_successful = False
self._log_provider_error(target, provider.get_name(), str(e))
return new_targets, set(), provider_successful
return new_targets, large_entity_members, provider_successful
def _execute_provider_query(self, provider: BaseProvider, target: str, is_ip: bool) -> Optional[ProviderResult]:
"""
@ -821,158 +824,73 @@ class Scanner:
self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
return None
def _create_large_entity_from_result(self, source_node: str, provider_name: str,
provider_result: ProviderResult, depth: int) -> Tuple[str, Set[str]]:
"""
Creates a large entity node, tags all member nodes, and returns its ID and members.
"""
members = {rel.target_node for rel in provider_result.relationships
if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)}
if not members:
return "", set()
large_entity_id = f"le_{provider_name}_{source_node}"
self.graph.add_node(
node_id=large_entity_id,
node_type=NodeType.LARGE_ENTITY,
attributes=[
{"name": "count", "value": len(members), "type": "statistic"},
{"name": "source_provider", "value": provider_name, "type": "metadata"},
{"name": "discovery_depth", "value": depth, "type": "metadata"},
{"name": "nodes", "value": list(members), "type": "metadata"}
],
description=f"A collection of {len(members)} nodes discovered from {source_node} via {provider_name}."
)
for member_id in members:
node_type = NodeType.IP if _is_valid_ip(member_id) else NodeType.DOMAIN
self.graph.add_node(
node_id=member_id,
node_type=node_type,
metadata={'large_entity_id': large_entity_id}
)
return large_entity_id, members
def extract_node_from_large_entity(self, large_entity_id: str, node_id: str) -> bool:
"""
Removes a node from a large entity, allowing it to be processed normally.
"""
if not self.graph.graph.has_node(node_id):
return False
node_data = self.graph.graph.nodes[node_id]
metadata = node_data.get('metadata', {})
if metadata.get('large_entity_id') == large_entity_id:
# Remove the large entity tag
del metadata['large_entity_id']
self.graph.add_node(node_id, NodeType(node_data['type']), metadata=metadata)
# Re-enqueue the node for full processing
is_ip = _is_valid_ip(node_id)
eligible_providers = self._get_eligible_providers(node_id, is_ip, False)
for provider in eligible_providers:
provider_name = provider.get_name()
priority = self._get_priority(provider_name)
# Use current depth of the large entity if available, else 0
depth = 0
if self.graph.graph.has_node(large_entity_id):
le_attrs = self.graph.graph.nodes[large_entity_id].get('attributes', [])
depth_attr = next((a for a in le_attrs if a['name'] == 'discovery_depth'), None)
if depth_attr:
depth = depth_attr['value']
self.task_queue.put((time.time(), priority, (provider_name, node_id, depth)))
self.total_tasks_ever_enqueued += 1
return True
return False
def _process_provider_result_unified(self, target: str, provider: BaseProvider,
provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
"""
Process a unified ProviderResult object to update the graph.
This version dynamically re-routes edges to a large entity container.
Handles large entity creation while ensuring all underlying nodes and edges are
added to the graph data model for a complete dataset.
"""
provider_name = provider.get_name()
discovered_targets = set()
large_entity_id = ""
large_entity_members = set()
if self._is_stop_requested():
return discovered_targets, False
eligible_rel_count = sum(
# Check if a large entity should be created based on the count of domain/IP relationships
eligible_relationship_count = sum(
1 for rel in provider_result.relationships if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)
)
is_large_entity = eligible_rel_count > self.config.large_entity_threshold
is_large_entity = eligible_relationship_count > self.config.large_entity_threshold
if is_large_entity:
large_entity_id, large_entity_members = self._create_large_entity_from_result(
# Create the large entity node and get the set of its members
large_entity_members = self._create_large_entity_from_provider_result(
target, provider_name, provider_result, current_depth
)
# Process ALL relationships to build the complete underlying data model
for i, relationship in enumerate(provider_result.relationships):
if i % 5 == 0 and self._is_stop_requested():
break
source_node_id = relationship.source_node
target_node_id = relationship.target_node
source_node = relationship.source_node
target_node = relationship.target_node
# Determine visual source and target, substituting with large entity ID if necessary
visual_source = large_entity_id if source_node_id in large_entity_members else source_node_id
visual_target = large_entity_id if target_node_id in large_entity_members else target_node_id
# Prevent self-loops on the large entity node
if visual_source == visual_target:
continue
# Determine node types for the actual nodes
source_type = NodeType.IP if _is_valid_ip(source_node_id) else NodeType.DOMAIN
# Determine node types
source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
if provider_name == 'shodan' and relationship.relationship_type == 'shodan_isp':
target_type = NodeType.ISP
elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer':
target_type = NodeType.CA
elif provider_name == 'correlation':
target_type = NodeType.CORRELATION_OBJECT
elif _is_valid_ip(target_node_id):
elif _is_valid_ip(target_node):
target_type = NodeType.IP
else:
target_type = NodeType.DOMAIN
max_depth_reached = current_depth >= self.max_depth
# Add actual nodes to the graph (they might be hidden by the UI)
self.graph.add_node(source_node_id, source_type)
self.graph.add_node(target_node_id, target_type, metadata={'max_depth_reached': max_depth_reached})
# Add the visual edge to the graph
# Add all nodes and edges to the graph's data model.
# The frontend will handle the visual re-routing for large entity members.
self.graph.add_node(source_node, source_type)
self.graph.add_node(target_node, target_type, metadata={'max_depth_reached': max_depth_reached})
self.graph.add_edge(
visual_source, visual_target,
source_node, target_node,
relationship.relationship_type,
relationship.confidence,
provider_name,
relationship.raw_data
)
if (_is_valid_domain(target_node_id) or _is_valid_ip(target_node_id)) and not max_depth_reached:
if target_node_id not in large_entity_members:
discovered_targets.add(target_node_id)
if large_entity_members:
self.logger.logger.info(f"Enqueuing DNS and Correlation for {len(large_entity_members)} members of {large_entity_id}")
for member in large_entity_members:
for provider_name_to_run in ['dns', 'correlation']:
p_instance = next((p for p in self.providers if p.get_name() == provider_name_to_run), None)
if p_instance and p_instance.get_eligibility().get('domains' if _is_valid_domain(member) else 'ips'):
priority = self._get_priority(provider_name_to_run)
self.task_queue.put((time.time(), priority, (provider_name_to_run, member, current_depth)))
self.total_tasks_ever_enqueued += 1
# Add all discovered domains/IPs to be considered for further processing
if (_is_valid_domain(target_node) or _is_valid_ip(target_node)) and not max_depth_reached:
discovered_targets.add(target_node)
# Process all attributes and add them to the corresponding nodes
attributes_by_node = defaultdict(list)
for attribute in provider_result.attributes:
attr_dict = {
@ -991,6 +909,65 @@ class Scanner:
return discovered_targets, is_large_entity
def _create_large_entity_from_provider_result(self, source: str, provider_name: str,
provider_result: ProviderResult, current_depth: int) -> Set[str]:
"""
Create a large entity node and connect it to the source and any shared
non-member nodes like CAs or ISPs.
"""
entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}"
members = {
rel.target_node for rel in provider_result.relationships
if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)
}
if not members:
return set()
first_member = next(iter(members))
node_type = 'ip' if _is_valid_ip(first_member) else 'domain'
attributes_dict = {
'count': len(members),
'nodes': list(members),
'node_type': node_type,
'source_provider': provider_name,
'discovery_depth': current_depth,
'threshold_exceeded': self.config.large_entity_threshold,
}
attributes_list = [
{
"name": key, "value": value, "type": "large_entity_info",
"provider": provider_name, "confidence": 0.9, "metadata": {}
} for key, value in attributes_dict.items()
]
description = f'Large entity created due to {len(members)} relationships from {provider_name}'
self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes_list, description=description)
# Add a representative edge from the source to the large entity
if provider_result.relationships:
rep_rel = provider_result.relationships[0]
self.graph.add_edge(source, entity_id, rep_rel.relationship_type, 0.9, provider_name,
{'large_entity_info': f'Contains {len(members)} {node_type}s'})
# Create edges from the large entity to shared non-member nodes (e.g., CAs, ISPs)
processed_targets = set()
for rel in provider_result.relationships:
if rel.source_node in members and rel.target_node not in members:
if rel.target_node not in processed_targets:
self.graph.add_edge(
entity_id, rel.target_node, rel.relationship_type, rel.confidence,
rel.provider, rel.raw_data
)
processed_targets.add(rel.target_node)
self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(members)} targets from {provider_name}")
return members
def stop_scan(self) -> bool:
"""Request immediate scan termination with proper cleanup."""
try:
@ -1018,6 +995,127 @@ class Scanner:
traceback.print_exc()
return False
def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
"""
Extracts a node from a large entity, restores ALL of its original connections,
and re-queues it for scanning.
"""
if not self.graph.graph.has_node(large_entity_id):
return False
# Extract the node from the large entity's internal list
success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract)
if not success:
return False
# Restore all incoming and outgoing edges for the extracted node
# These edges already exist in the graph data model; this ensures they are "activated"
# for the frontend.
for u, v, data in self.graph.graph.in_edges(node_id_to_extract, data=True):
self.graph.add_edge(u, v, data.get('relationship_type'), data.get('confidence_score'),
data.get('source_provider'), data.get('raw_data'))
for u, v, data in self.graph.graph.out_edges(node_id_to_extract, data=True):
self.graph.add_edge(u, v, data.get('relationship_type'), data.get('confidence_score'),
data.get('source_provider'), data.get('raw_data'))
# Re-queue the extracted node for further scanning if it is a domain or IP
is_ip = _is_valid_ip(node_id_to_extract)
is_domain = _is_valid_domain(node_id_to_extract)
if is_domain or is_ip:
large_entity_attributes = self.graph.graph.nodes[large_entity_id].get('attributes', [])
discovery_depth_attr = next((attr for attr in large_entity_attributes if attr.get('name') == 'discovery_depth'), None)
current_depth = discovery_depth_attr['value'] if discovery_depth_attr else 0
eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
for provider in eligible_providers:
# Exclude DNS and correlation providers from re-processing
if provider.get_name() not in ['dns', 'correlation']:
provider_name = provider.get_name()
priority = self._get_priority(provider_name)
self.task_queue.put((time.time(), priority, (provider_name, node_id_to_extract, current_depth)))
self.total_tasks_ever_enqueued += 1
if self.status != ScanStatus.RUNNING:
self.status = ScanStatus.RUNNING
self._update_session_state()
if not self.scan_thread or not self.scan_thread.is_alive():
self.scan_thread = threading.Thread(
target=self._execute_scan,
args=(self.current_target, self.max_depth),
daemon=True
)
self.scan_thread.start()
else:
self.logger.logger.info(f"Extracted non-scannable node {node_id_to_extract} of type {self.graph.graph.nodes[node_id_to_extract].get('type', 'unknown')}")
return True
def _determine_extracted_node_type(self, node_id: str, large_entity_id: str) -> NodeType:
"""
FIXED: Determine the correct node type for a node being extracted from a large entity.
Uses multiple strategies to ensure accurate type detection.
"""
from utils.helpers import _is_valid_ip, _is_valid_domain
# Strategy 1: Check if node already exists in graph with a type
if self.graph.has_node(node_id):
existing_type = self.graph.nodes[node_id].get('type')
if existing_type:
try:
return NodeType(existing_type)
except ValueError:
pass
# Strategy 2: Look for existing relationships to this node to infer type
for source, target, edge_data in self.graph.edges(data=True):
if target == node_id:
rel_type = edge_data.get('relationship_type', '')
provider = edge_data.get('source_provider', '')
# CA nodes from certificate issuer relationships
if provider == 'crtsh' and rel_type == 'crtsh_cert_issuer':
return NodeType.CA
# ISP nodes from Shodan
if provider == 'shodan' and rel_type == 'shodan_isp':
return NodeType.ISP
# Correlation objects
if rel_type.startswith('corr_'):
return NodeType.CORRELATION_OBJECT
if source == node_id:
rel_type = edge_data.get('relationship_type', '')
provider = edge_data.get('source_provider', '')
# Source nodes in cert issuer relationships are CAs
if provider == 'crtsh' and rel_type == 'crtsh_cert_issuer':
return NodeType.CA
# Strategy 3: Format-based detection (fallback)
if _is_valid_ip(node_id):
return NodeType.IP
elif _is_valid_domain(node_id):
return NodeType.DOMAIN
# Strategy 4: Check large entity context
if self.graph.has_node(large_entity_id):
large_entity_data = self.graph.nodes[large_entity_id]
attributes = large_entity_data.get('attributes', [])
node_type_attr = next((attr for attr in attributes if attr.get('name') == 'node_type'), None)
if node_type_attr:
entity_node_type = node_type_attr.get('value', 'domain')
if entity_node_type == 'ip':
return NodeType.IP
else:
return NodeType.DOMAIN
# Final fallback
return NodeType.DOMAIN
def _update_session_state(self) -> None:
"""
Update the scanner state in Redis for GUI updates.
@ -1088,19 +1186,8 @@ class Scanner:
eligible = []
target_key = 'ips' if is_ip else 'domains'
# Check if the target is part of a large entity
is_in_large_entity = False
if self.graph.graph.has_node(target):
metadata = self.graph.graph.nodes[target].get('metadata', {})
if 'large_entity_id' in metadata:
is_in_large_entity = True
for provider in self.providers:
try:
# If in large entity, only allow dns and correlation providers
if is_in_large_entity and provider.get_name() not in ['dns', 'correlation']:
continue
# Check if provider supports this target type
if not provider.get_eligibility().get(target_key, False):
continue

View File

@ -2,37 +2,15 @@
import json
import re
import psycopg2
from pathlib import Path
from typing import List, Dict, Any, Set, Optional
from typing import List, Dict, Any, Set
from urllib.parse import quote
from datetime import datetime, timezone
import requests
from psycopg2 import pool
from .base_provider import BaseProvider
from core.provider_result import ProviderResult
from utils.helpers import _is_valid_domain
from core.logger import get_forensic_logger
# --- Global Instance for PostgreSQL Connection Pool ---
# This pool will be created once per worker process and is not part of the
# CrtShProvider instance, thus avoiding pickling errors.
db_pool = None
try:
db_pool = psycopg2.pool.SimpleConnectionPool(
1, 5,
host='crt.sh',
port=5432,
user='guest',
dbname='certwatch',
sslmode='prefer',
connect_timeout=60
)
# Use a generic logger here as this is at the module level
get_forensic_logger().logger.info("crt.sh: Global PostgreSQL connection pool created successfully.")
except Exception as e:
get_forensic_logger().logger.warning(f"crt.sh: Failed to create global DB connection pool: {e}. Will fall back to HTTP API.")
class CrtShProvider(BaseProvider):
@ -143,7 +121,7 @@ class CrtShProvider(BaseProvider):
else: # "stale" or "not_found"
# Query the API for the latest certificates
new_raw_certs = self._query_crtsh(domain)
new_raw_certs = self._query_crtsh_api(domain)
if self._stop_event and self._stop_event.is_set():
return ProviderResult()
@ -174,8 +152,8 @@ class CrtShProvider(BaseProvider):
# Save the new result and the raw data to the cache
self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)
except (requests.exceptions.RequestException, psycopg2.Error) as e:
self.logger.logger.error(f"Upstream query failed for {domain}: {e}")
except requests.exceptions.RequestException as e:
self.logger.logger.error(f"API query failed for {domain}: {e}")
if cache_status != "not_found":
result = self._load_from_cache(cache_file)
self.logger.logger.warning(f"Using stale cache for {domain} due to API failure.")
@ -278,58 +256,6 @@ class CrtShProvider(BaseProvider):
except Exception as e:
self.logger.logger.warning(f"Failed to save cache file for {domain}: {e}")
def _query_crtsh(self, domain: str) -> List[Dict[str, Any]]:
"""Query crt.sh, trying the database first and falling back to the API."""
global db_pool
if db_pool:
try:
self.logger.logger.info(f"crt.sh: Attempting DB query for {domain}")
return self._query_crtsh_db(domain)
except psycopg2.Error as e:
self.logger.logger.warning(f"crt.sh: DB query failed for {domain}: {e}. Falling back to HTTP API.")
return self._query_crtsh_api(domain)
else:
self.logger.logger.info(f"crt.sh: No DB connection pool. Using HTTP API for {domain}")
return self._query_crtsh_api(domain)
def _query_crtsh_db(self, domain: str) -> List[Dict[str, Any]]:
"""Query crt.sh database for raw certificate data."""
global db_pool
conn = db_pool.getconn()
try:
with conn.cursor() as cursor:
query = """
SELECT
c.id,
x509_serialnumber(c.certificate) as serial_number,
x509_notbefore(c.certificate) as not_before,
x509_notafter(c.certificate) as not_after,
c.issuer_ca_id,
ca.name as issuer_name,
x509_commonname(c.certificate) as common_name,
identities(c.certificate)::text as name_value
FROM certificate c
LEFT JOIN ca ON c.issuer_ca_id = ca.id
WHERE identities(c.certificate) @@ plainto_tsquery(%s)
ORDER BY c.id DESC
LIMIT 5000;
"""
cursor.execute(query, (domain,))
results = []
columns = [desc[0] for desc in cursor.description]
for row in cursor.fetchall():
row_dict = dict(zip(columns, row))
if row_dict.get('not_before'):
row_dict['not_before'] = row_dict['not_before'].isoformat()
if row_dict.get('not_after'):
row_dict['not_after'] = row_dict['not_after'].isoformat()
results.append(row_dict)
self.logger.logger.info(f"crt.sh: DB query for {domain} returned {len(results)} records.")
return results
finally:
db_pool.putconn(conn)
def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]:
"""Query crt.sh API for raw certificate data."""
url = f"{self.base_url}?q={quote(domain)}&output=json"
@ -360,17 +286,6 @@ class CrtShProvider(BaseProvider):
self.logger.logger.info(f"CrtSh processing cancelled before processing for domain: {query_domain}")
return result
incompleteness_warning = self._check_for_incomplete_data(query_domain, certificates)
if incompleteness_warning:
result.add_attribute(
target_node=query_domain,
name="crtsh_data_warning",
value=incompleteness_warning,
attr_type='metadata',
provider=self.name,
confidence=1.0
)
all_discovered_domains = set()
processed_issuers = set()
@ -542,8 +457,6 @@ class CrtShProvider(BaseProvider):
raise ValueError("Empty date string")
try:
if isinstance(date_string, datetime):
return date_string.replace(tzinfo=timezone.utc)
if date_string.endswith('Z'):
return datetime.fromisoformat(date_string[:-1]).replace(tzinfo=timezone.utc)
elif '+' in date_string or date_string.endswith('UTC'):
@ -665,29 +578,3 @@ class CrtShProvider(BaseProvider):
return 'parent_domain'
else:
return 'related_domain'
def _check_for_incomplete_data(self, domain: str, certificates: List[Dict[str, Any]]) -> Optional[str]:
"""
Analyzes the certificate list to heuristically detect if the data from crt.sh is incomplete.
"""
cert_count = len(certificates)
# Heuristic 1: Check if the number of certs hits a known hard limit.
if cert_count >= 10000:
return f"Result likely truncated; received {cert_count} certificates, which may be the maximum limit."
# Heuristic 2: Check if all returned certificates are old.
if cert_count > 1000: # Only apply this for a reasonable number of certs
latest_expiry = None
for cert in certificates:
try:
not_after = self._parse_certificate_date(cert.get('not_after'))
if latest_expiry is None or not_after > latest_expiry:
latest_expiry = not_after
except (ValueError, TypeError):
continue
if latest_expiry and (datetime.now(timezone.utc) - latest_expiry).days > 365:
return f"Incomplete data suspected: The latest certificate expired more than a year ago ({latest_expiry.strftime('%Y-%m-%d')})."
return None

View File

@ -8,4 +8,3 @@ dnspython
gunicorn
redis
python-dotenv
psycopg2-binary

View File

@ -1,4 +1,3 @@
// dnsrecon-reduced/static/js/graph.js
/**
* Graph visualization module for DNSRecon
* Handles network graph rendering using vis.js with proper large entity node hiding
@ -363,60 +362,100 @@ class GraphManager {
}
try {
// Initialize if not already done
if (!this.isInitialized) {
this.initialize();
}
this.initialTargetIds = new Set(graphData.initial_targets || []);
// Check if we have actual data to display
const hasData = graphData.nodes.length > 0 || graphData.edges.length > 0;
// Handle placeholder visibility
const placeholder = this.container.querySelector('.graph-placeholder');
if (placeholder) {
placeholder.style.display = hasData ? 'none' : 'flex';
}
if (!hasData) {
this.nodes.clear();
this.edges.clear();
return;
if (hasData) {
placeholder.style.display = 'none';
} else {
placeholder.style.display = 'flex';
// Early return if no data to process
return;
}
}
const nodeMap = new Map(graphData.nodes.map(node => [node.id, node]));
this.largeEntityMembers.clear();
const largeEntityMap = new Map();
// Filter out hidden nodes before processing for rendering
const filteredNodes = graphData.nodes.filter(node =>
!(node.metadata && node.metadata.large_entity_id)
);
graphData.nodes.forEach(node => {
if (node.type === 'large_entity' && node.attributes) {
const nodesAttribute = this.findAttributeByName(node.attributes, 'nodes');
if (nodesAttribute && Array.isArray(nodesAttribute.value)) {
nodesAttribute.value.forEach(nodeId => {
largeEntityMap.set(nodeId, node.id);
this.largeEntityMembers.add(nodeId);
});
}
}
});
const processedNodes = graphData.nodes.map(node => {
const filteredNodes = graphData.nodes.filter(node => {
return !this.largeEntityMembers.has(node.id) || node.type === 'large_entity';
});
console.log(`Filtered ${graphData.nodes.length - filteredNodes.length} large entity member nodes from visualization`);
// Process nodes with proper certificate coloring
const processedNodes = filteredNodes.map(node => {
const processed = this.processNode(node);
if (node.metadata && node.metadata.large_entity_id) {
processed.hidden = true;
// Apply certificate-based coloring here in frontend
if (node.type === 'domain' && Array.isArray(node.attributes)) {
const certInfo = this.analyzeCertificateInfo(node.attributes);
if (certInfo.hasExpiredOnly) {
// Red for domains with only expired/invalid certificates
processed.color = { background: '#ff6b6b', border: '#cc5555' };
} else if (!certInfo.hasCertificates) {
// Grey for domains with no certificates
processed.color = { background: '#c7c7c7', border: '#999999' };
}
// Valid certificates use default green (handled by processNode)
}
return processed;
});
const mergedEdges = {};
graphData.edges.forEach(edge => {
const fromNode = largeEntityMap.has(edge.from) ? largeEntityMap.get(edge.from) : edge.from;
const toNode = largeEntityMap.has(edge.to) ? largeEntityMap.get(edge.to) : edge.to;
const mergeKey = `${fromNode}-${toNode}-${edge.label}`;
if (!mergedEdges[mergeKey]) {
mergedEdges[mergeKey] = {
...edge,
from: fromNode,
to: toNode,
count: 0,
confidence_score: 0
};
}
mergedEdges[mergeKey].count++;
if (edge.confidence_score > mergedEdges[mergeKey].confidence_score) {
mergedEdges[mergeKey].confidence_score = edge.confidence_score;
}
});
const processedEdges = Object.values(mergedEdges).map(edge => {
const processed = this.processEdge(edge);
if (edge.count > 1) {
processed.label = `${edge.label} (${edge.count})`;
}
return processed;
});
const processedEdges = graphData.edges.map(edge => {
let fromNode = nodeMap.get(edge.from);
let toNode = nodeMap.get(edge.to);
let fromId = edge.from;
let toId = edge.to;
if (fromNode && fromNode.metadata && fromNode.metadata.large_entity_id) {
fromId = fromNode.metadata.large_entity_id;
}
if (toNode && toNode.metadata && toNode.metadata.large_entity_id) {
toId = toNode.metadata.large_entity_id;
}
// Avoid self-referencing edges from re-routing
if (fromId === toId) {
return null;
}
const reRoutedEdge = { ...edge, from: fromId, to: toId };
return this.processEdge(reRoutedEdge);
}).filter(Boolean); // Remove nulls from self-referencing edges
// Update datasets with animation
const existingNodeIds = this.nodes.getIds();
const existingEdgeIds = this.edges.getIds();
@ -433,10 +472,13 @@ class GraphManager {
setTimeout(() => this.highlightNewElements(newNodes, newEdges), 100);
}
if (this.nodes.length <= 10 || existingNodeIds.length === 0) {
if (processedNodes.length <= 10 || existingNodeIds.length === 0) {
setTimeout(() => this.fitView(), 800);
}
console.log(`Graph updated: ${processedNodes.length} nodes, ${processedEdges.length} edges (${newNodes.length} new nodes, ${newEdges.length} new edges)`);
console.log(`Large entity members hidden: ${this.largeEntityMembers.size}`);
} catch (error) {
console.error('Failed to update graph:', error);
this.showError('Failed to update visualization');
@ -564,7 +606,7 @@ class GraphManager {
processEdge(edge) {
const confidence = edge.confidence_score || 0;
const processedEdge = {
id: `${edge.from}-${edge.to}-${edge.label}`,
id: `${edge.from}-${edge.to}`,
from: edge.from,
to: edge.to,
label: this.formatEdgeLabel(edge.label, confidence),
@ -1011,7 +1053,7 @@ class GraphManager {
this.nodes.clear();
this.edges.clear();
this.history = [];
this.largeEntityMembers.clear();
this.largeEntityMembers.clear(); // Clear large entity tracking
this.initialTargetIds.clear();
// Show placeholder
@ -1169,6 +1211,7 @@ class GraphManager {
const basicStats = {
nodeCount: this.nodes.length,
edgeCount: this.edges.length,
largeEntityMembersHidden: this.largeEntityMembers.size
};
// Add forensic statistics
@ -1565,43 +1608,14 @@ class GraphManager {
}
/**
* FIXED: Unhide all hidden nodes, excluding large entity members and disconnected nodes.
* This prevents orphaned large entity members from appearing as free-floating nodes.
* Unhide all hidden nodes
*/
unhideAll() {
const allHiddenNodes = this.nodes.get({
filter: (node) => {
// Skip nodes that are part of a large entity
if (node.metadata && node.metadata.large_entity_id) {
return false;
}
// Skip nodes that are not hidden
if (node.hidden !== true) {
return false;
}
// Skip nodes that have no edges (would appear disconnected)
const nodeId = node.id;
const hasIncomingEdges = this.edges.get().some(edge => edge.to === nodeId && !edge.hidden);
const hasOutgoingEdges = this.edges.get().some(edge => edge.from === nodeId && !edge.hidden);
if (!hasIncomingEdges && !hasOutgoingEdges) {
console.log(`Skipping disconnected node ${nodeId} from unhide`);
return false;
}
return true;
}
const allNodes = this.nodes.get({
filter: (node) => node.hidden === true
});
if (allHiddenNodes.length > 0) {
console.log(`Unhiding ${allHiddenNodes.length} nodes with valid connections`);
const updates = allHiddenNodes.map(node => ({ id: node.id, hidden: false }));
this.nodes.update(updates);
} else {
console.log('No eligible nodes to unhide');
}
const updates = allNodes.map(node => ({ id: node.id, hidden: false }));
this.nodes.update(updates);
}
}

View File

@ -1397,32 +1397,11 @@ class DNSReconApp {
}
/**
* UPDATED: Generate details for standard nodes with organized attribute grouping and data warnings
* UPDATED: Generate details for standard nodes with organized attribute grouping
*/
generateStandardNodeDetails(node) {
let html = '';
// Check for and display a crt.sh data warning if it exists
const crtshWarningAttr = this.findAttributeByName(node.attributes, 'crtsh_data_warning');
if (crtshWarningAttr) {
html += `
<div class="modal-section" style="border-left: 3px solid #ff9900; background: rgba(255, 153, 0, 0.05);">
<details open>
<summary style="color: #ff9900;">
<span> Data Integrity Warning</span>
</summary>
<div class="modal-section-content">
<p class="placeholder-subtext" style="color: #e0e0e0; font-size: 0.8rem; line-height: 1.5;">
${this.escapeHtml(crtshWarningAttr.value)}
<br><br>
This can occur for very large domains (e.g., google.com) where crt.sh may return a limited subset of all available certificates. As a result, the certificate status may not be fully representative.
</p>
</div>
</details>
</div>
`;
}
// Relationships sections
html += this.generateRelationshipsSection(node);
@ -1440,19 +1419,6 @@ class DNSReconApp {
return html;
}
/**
* Helper method to find an attribute by name in the standardized attributes list
* @param {Array} attributes - List of StandardAttribute objects
* @param {string} name - Attribute name to find
* @returns {Object|null} The attribute object if found, null otherwise
*/
findAttributeByName(attributes, name) {
if (!Array.isArray(attributes)) {
return null;
}
return attributes.find(attr => attr.name === name) || null;
}
generateOrganizedAttributesSection(attributes, nodeType) {
if (!Array.isArray(attributes) || attributes.length === 0) {
return '';
@ -2031,6 +1997,8 @@ class DNSReconApp {
if (response.success) {
this.showSuccess(response.message);
this.hideModal();
// If the scanner was idle, it's now running. Start polling to see the new node appear.
if (this.scanStatus === 'idle') {
this.startPolling(1000);