Compare commits

..

No commits in common. "36c0bcdc033c1355d4fe82f6ce049633b235efd0" and "571912218e6707cfcb05beea289f136084b1a7b2" have entirely different histories.

15 changed files with 1200 additions and 636 deletions

View File

@ -22,6 +22,7 @@ For power users who require more in-depth information, DNScope can be configured
* **In-Memory Graph Analysis**: Uses NetworkX for efficient relationship mapping.
* **Real-Time Visualization**: The graph updates dynamically as the scan progresses.
* **Forensic Logging**: A complete audit trail of all reconnaissance activities is maintained.
* **Confidence Scoring**: Relationships are weighted based on the reliability of the data source.
* **Session Management**: Supports concurrent user sessions with isolated scanner instances.
* **Extensible Provider Architecture**: Easily add new data sources to expand the tool's capabilities.
* **Web-Based UI**: An intuitive and interactive web interface for managing scans and visualizing results.

1
app.py
View File

@ -332,6 +332,7 @@ def revert_graph_action():
scanner.graph.add_edge(
source_id=edge['from'], target_id=edge['to'],
relationship_type=edge['metadata']['relationship_type'],
confidence_score=edge['metadata']['confidence_score'],
source_provider=edge['metadata']['source_provider'],
raw_data=edge.get('raw_data', {})
)

View File

@ -2,7 +2,7 @@
"""
Graph data model for DNScope using NetworkX.
Manages in-memory graph storage with forensic metadata.
Manages in-memory graph storage with confidence scoring and forensic metadata.
Now fully compatible with the unified ProviderResult data model.
UPDATED: Fixed correlation exclusion keys to match actual attribute names.
UPDATED: Removed export_json() method - now handled by ExportManager.
@ -31,7 +31,7 @@ class NodeType(Enum):
class GraphManager:
"""
Thread-safe graph manager for DNScope infrastructure mapping.
Uses NetworkX for in-memory graph storage.
Uses NetworkX for in-memory graph storage with confidence scoring.
Compatible with unified ProviderResult data model.
"""
@ -83,7 +83,7 @@ class GraphManager:
return is_new_node
def add_edge(self, source_id: str, target_id: str, relationship_type: str,
source_provider: str = "unknown",
confidence_score: float = 0.5, source_provider: str = "unknown",
raw_data: Optional[Dict[str, Any]] = None) -> bool:
"""
UPDATED: Add or update an edge between two nodes with raw relationship labels.
@ -91,13 +91,23 @@ class GraphManager:
if not self.graph.has_node(source_id) or not self.graph.has_node(target_id):
return False
new_confidence = confidence_score
# UPDATED: Use raw relationship type - no formatting
edge_label = relationship_type
if self.graph.has_edge(source_id, target_id):
# If edge exists, update confidence if the new score is higher.
if new_confidence > self.graph.edges[source_id, target_id].get('confidence_score', 0):
self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(timezone.utc).isoformat()
self.graph.edges[source_id, target_id]['updated_by'] = source_provider
return False
# Add a new edge with raw attributes
self.graph.add_edge(source_id, target_id,
relationship_type=edge_label,
confidence_score=new_confidence,
source_provider=source_provider,
discovery_timestamp=datetime.now(timezone.utc).isoformat(),
raw_data=raw_data or {})
@ -127,6 +137,11 @@ class GraphManager:
"""Get all nodes of a specific type."""
return [n for n, d in self.graph.nodes(data=True) if d.get('type') == node_type.value]
def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
"""Get edges with confidence score above a given threshold."""
return [(u, v, d) for u, v, d in self.graph.edges(data=True)
if d.get('confidence_score', 0) >= min_confidence]
def get_graph_data(self) -> Dict[str, Any]:
"""
Export graph data formatted for frontend visualization.
@ -162,9 +177,9 @@ class GraphManager:
'from': source,
'to': target,
'label': attrs.get('relationship_type', ''),
'confidence_score': attrs.get('confidence_score', 0),
'source_provider': attrs.get('source_provider', ''),
'discovery_timestamp': attrs.get('discovery_timestamp'),
'raw_data': attrs.get('raw_data', {})
'discovery_timestamp': attrs.get('discovery_timestamp')
})
return {
@ -173,6 +188,24 @@ class GraphManager:
'statistics': self.get_statistics()['basic_metrics']
}
def _get_confidence_distribution(self) -> Dict[str, int]:
"""Get distribution of edge confidence scores with empty graph handling."""
distribution = {'high': 0, 'medium': 0, 'low': 0}
# FIXED: Handle empty graph case
if self.get_edge_count() == 0:
return distribution
for _, _, data in self.graph.edges(data=True):
confidence = data.get('confidence_score', 0)
if confidence >= 0.8:
distribution['high'] += 1
elif confidence >= 0.6:
distribution['medium'] += 1
else:
distribution['low'] += 1
return distribution
def get_statistics(self) -> Dict[str, Any]:
"""Get comprehensive statistics about the graph with proper empty graph handling."""
@ -189,6 +222,7 @@ class GraphManager:
},
'node_type_distribution': {},
'relationship_type_distribution': {},
'confidence_distribution': self._get_confidence_distribution(),
'provider_distribution': {}
}

View File

@ -30,6 +30,7 @@ class RelationshipDiscovery:
source_node: str
target_node: str
relationship_type: str
confidence_score: float
provider: str
raw_data: Dict[str, Any]
discovery_method: str
@ -156,7 +157,7 @@ class ForensicLogger:
self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")
def log_relationship_discovery(self, source_node: str, target_node: str,
relationship_type: str,
relationship_type: str, confidence_score: float,
provider: str, raw_data: Dict[str, Any],
discovery_method: str) -> None:
"""
@ -166,6 +167,7 @@ class ForensicLogger:
source_node: Source node identifier
target_node: Target node identifier
relationship_type: Type of relationship (e.g., 'SAN', 'A_Record')
confidence_score: Confidence score (0.0 to 1.0)
provider: Provider that discovered this relationship
raw_data: Raw data from provider response
discovery_method: Method used to discover relationship
@ -175,6 +177,7 @@ class ForensicLogger:
source_node=source_node,
target_node=target_node,
relationship_type=relationship_type,
confidence_score=confidence_score,
provider=provider,
raw_data=raw_data,
discovery_method=discovery_method
@ -185,7 +188,7 @@ class ForensicLogger:
self.logger.info(
f"Relationship Discovered - {source_node} -> {target_node} "
f"({relationship_type}) - Provider: {provider}"
f"({relationship_type}) - Confidence: {confidence_score:.2f} - Provider: {provider}"
)
def log_scan_start(self, target_domain: str, recursion_depth: int,
@ -235,6 +238,7 @@ class ForensicLogger:
'successful_requests': len([req for req in provider_requests if req.error is None]),
'failed_requests': len([req for req in provider_requests if req.error is not None]),
'relationships_discovered': len(provider_relationships),
'avg_confidence': sum(rel.confidence_score for rel in provider_relationships) / len(provider_relationships) if provider_relationships else 0
}
return {

View File

@ -18,19 +18,33 @@ class StandardAttribute:
value: Any
type: str
provider: str
confidence: float
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
metadata: Optional[Dict[str, Any]] = field(default_factory=dict)
def __post_init__(self):
"""Validate the attribute after initialization."""
if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
@dataclass
class Relationship:
"""A unified data structure for a directional link between two nodes."""
source_node: str
target_node: str
relationship_type: str
confidence: float
provider: str
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
raw_data: Optional[Dict[str, Any]] = field(default_factory=dict)
def __post_init__(self):
"""Validate the relationship after initialization."""
if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
@dataclass
class ProviderResult:
"""A container for all data returned by a provider from a single query."""
@ -38,7 +52,8 @@ class ProviderResult:
relationships: List[Relationship] = field(default_factory=list)
def add_attribute(self, target_node: str, name: str, value: Any, attr_type: str,
provider: str, metadata: Optional[Dict[str, Any]] = None) -> None:
provider: str, confidence: float = 0.8,
metadata: Optional[Dict[str, Any]] = None) -> None:
"""Helper method to add an attribute to the result."""
self.attributes.append(StandardAttribute(
target_node=target_node,
@ -46,16 +61,19 @@ class ProviderResult:
value=value,
type=attr_type,
provider=provider,
confidence=confidence,
metadata=metadata or {}
))
def add_relationship(self, source_node: str, target_node: str, relationship_type: str,
provider: str, raw_data: Optional[Dict[str, Any]] = None) -> None:
provider: str, confidence: float = 0.8,
raw_data: Optional[Dict[str, Any]] = None) -> None:
"""Helper method to add a relationship to the result."""
self.relationships.append(Relationship(
source_node=source_node,
target_node=target_node,
relationship_type=relationship_type,
confidence=confidence,
provider=provider,
raw_data=raw_data or {}
))

View File

@ -586,7 +586,6 @@ class Scanner:
if self.status in [ScanStatus.FINALIZING, ScanStatus.COMPLETED, ScanStatus.STOPPED]:
print(f"\n=== PHASE 2: Running correlation analysis ===")
self._run_correlation_phase(max_depth, processed_tasks)
self._update_session_state()
# Determine the final status *after* finalization.
if self._is_stop_requested():
@ -848,6 +847,7 @@ class Scanner:
'source_node': rel.source_node,
'target_node': rel.target_node,
'relationship_type': rel.relationship_type,
'confidence': rel.confidence,
'provider': rel.provider,
'raw_data': rel.raw_data
})
@ -905,6 +905,7 @@ class Scanner:
source_id=rel_data['source_node'],
target_id=rel_data['target_node'],
relationship_type=rel_data['relationship_type'],
confidence_score=rel_data['confidence'],
source_provider=rel_data['provider'],
raw_data=rel_data['raw_data']
)
@ -930,7 +931,7 @@ class Scanner:
# Re-enqueue the node for full processing
is_ip = _is_valid_ip(node_id)
eligible_providers = self._get_eligible_providers(node_id, is_ip, False, is_extracted=True)
eligible_providers = self._get_eligible_providers(node_id, is_ip, False)
for provider in eligible_providers:
provider_name = provider.get_name()
priority = self._get_priority(provider_name)
@ -1011,6 +1012,7 @@ class Scanner:
self.graph.add_edge(
visual_source, visual_target,
relationship.relationship_type,
relationship.confidence,
provider_name,
relationship.raw_data
)
@ -1033,7 +1035,7 @@ class Scanner:
for attribute in provider_result.attributes:
attr_dict = {
"name": attribute.name, "value": attribute.value, "type": attribute.type,
"provider": attribute.provider, "metadata": attribute.metadata
"provider": attribute.provider, "confidence": attribute.confidence, "metadata": attribute.metadata
}
attributes_by_node[attribute.target_node].append(attr_dict)
@ -1134,7 +1136,7 @@ class Scanner:
self.logger.logger.warning(f"Error initializing provider states for {target}: {e}")
def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool, is_extracted: bool = False) -> List:
def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool) -> List:
"""
FIXED: Improved provider eligibility checking with better filtering.
"""
@ -1146,7 +1148,7 @@ class Scanner:
# Check if the target is part of a large entity
is_in_large_entity = False
if self.graph.graph.has_node(target) and not is_extracted:
if self.graph.graph.has_node(target):
metadata = self.graph.graph.nodes[target].get('metadata', {})
if 'large_entity_id' in metadata:
is_in_large_entity = True

View File

@ -229,6 +229,7 @@ class BaseProvider(ABC):
def log_relationship_discovery(self, source_node: str, target_node: str,
relationship_type: str,
confidence_score: float,
raw_data: Dict[str, Any],
discovery_method: str) -> None:
"""
@ -238,6 +239,7 @@ class BaseProvider(ABC):
source_node: Source node identifier
target_node: Target node identifier
relationship_type: Type of relationship
confidence_score: Confidence score
raw_data: Raw data from provider
discovery_method: Method used for discovery
"""
@ -247,6 +249,7 @@ class BaseProvider(ABC):
source_node=source_node,
target_node=target_node,
relationship_type=relationship_type,
confidence_score=confidence_score,
provider=self.name,
raw_data=raw_data,
discovery_method=discovery_method

View File

@ -1,8 +1,7 @@
# dnsrecon-reduced/providers/correlation_provider.py
# DNScope/providers/correlation_provider.py
import re
from typing import Dict, Any, List
from datetime import datetime, timezone
from .base_provider import BaseProvider
from core.provider_result import ProviderResult
@ -11,7 +10,6 @@ from core.graph_manager import NodeType, GraphManager
class CorrelationProvider(BaseProvider):
"""
A provider that finds correlations between nodes in the graph.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
"""
def __init__(self, name: str = "correlation", session_config=None):
@ -24,10 +22,6 @@ class CorrelationProvider(BaseProvider):
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
self.EXCLUDED_KEYS = [
'cert_source',
'a_records',
'mx_records',
'ns_records',
'ptr_records',
'cert_issuer_ca_id',
'cert_common_name',
'cert_validity_period_days',
@ -42,8 +36,6 @@ class CorrelationProvider(BaseProvider):
'updated_timestamp',
'discovery_timestamp',
'query_timestamp',
'shodan_ip_str',
'shodan_a_record',
]
def get_name(self) -> str:
@ -69,14 +61,12 @@ class CorrelationProvider(BaseProvider):
def query_domain(self, domain: str) -> ProviderResult:
"""
Query the provider for information about a domain.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
"""
return self._find_correlations(domain)
def query_ip(self, ip: str) -> ProviderResult:
"""
Query the provider for information about an IP address.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
"""
return self._find_correlations(ip)
@ -89,10 +79,8 @@ class CorrelationProvider(BaseProvider):
def _find_correlations(self, node_id: str) -> ProviderResult:
"""
Find correlations for a given node with enhanced filtering and error handling.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring and list value processing.
"""
result = ProviderResult()
discovery_time = datetime.now(timezone.utc)
# Enhanced safety checks
if not self.graph or not self.graph.graph.has_node(node_id):
@ -115,28 +103,20 @@ class CorrelationProvider(BaseProvider):
attr_value = attr.get('value')
attr_provider = attr.get('provider', 'unknown')
# Prepare a list of values to iterate over
values_to_process = []
if isinstance(attr_value, list):
values_to_process.extend(attr_value)
else:
values_to_process.append(attr_value)
for value_item in values_to_process:
# Enhanced filtering logic
should_exclude = self._should_exclude_attribute(attr_name, value_item)
should_exclude = self._should_exclude_attribute(attr_name, attr_value)
if should_exclude:
continue
# Build correlation index
if value_item not in self.correlation_index:
self.correlation_index[value_item] = {
if attr_value not in self.correlation_index:
self.correlation_index[attr_value] = {
'nodes': set(),
'sources': []
}
self.correlation_index[value_item]['nodes'].add(node_id)
self.correlation_index[attr_value]['nodes'].add(node_id)
source_info = {
'node_id': node_id,
@ -146,14 +126,14 @@ class CorrelationProvider(BaseProvider):
}
# Avoid duplicate sources
existing_sources = [s for s in self.correlation_index[value_item]['sources']
existing_sources = [s for s in self.correlation_index[attr_value]['sources']
if s['node_id'] == node_id and s['path'] == source_info['path']]
if not existing_sources:
self.correlation_index[value_item]['sources'].append(source_info)
self.correlation_index[attr_value]['sources'].append(source_info)
# Create correlation if we have multiple nodes with this value
if len(self.correlation_index[value_item]['nodes']) > 1:
self._create_correlation_relationships(value_item, self.correlation_index[value_item], result, discovery_time)
if len(self.correlation_index[attr_value]['nodes']) > 1:
self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result)
correlations_found += 1
# Log correlation results
@ -207,11 +187,9 @@ class CorrelationProvider(BaseProvider):
return False
def _create_correlation_relationships(self, value: Any, correlation_data: Dict[str, Any],
result: ProviderResult, discovery_time: datetime):
def _create_correlation_relationships(self, value: Any, correlation_data: Dict[str, Any], result: ProviderResult):
"""
Create correlation relationships with enhanced deduplication and validation.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
"""
correlation_node_id = f"corr_{hash(str(value)) & 0x7FFFFFFF}"
nodes = correlation_data['nodes']
@ -238,6 +216,7 @@ class CorrelationProvider(BaseProvider):
value=value,
attr_type=str(type(value).__name__),
provider=self.name,
confidence=0.9,
metadata={
'correlated_nodes': list(nodes),
'sources': sources,
@ -246,7 +225,7 @@ class CorrelationProvider(BaseProvider):
}
)
# Create relationships with source validation and enhanced timestamps
# Create relationships with source validation
created_relationships = set()
for source in sources:
@ -261,23 +240,19 @@ class CorrelationProvider(BaseProvider):
relationship_label = f"corr_{provider}_{attribute}"
# Enhanced raw_data with discovery timestamp for time-based edge coloring
raw_data = {
'correlation_value': value,
'original_attribute': attribute,
'correlation_type': 'attribute_matching',
'correlation_size': len(nodes),
'discovery_timestamp': discovery_time.isoformat(),
'relevance_timestamp': discovery_time.isoformat() # Correlation data is "fresh" when discovered
}
# Add the relationship to the result
result.add_relationship(
source_node=node_id,
target_node=correlation_node_id,
relationship_type=relationship_label,
provider=self.name,
raw_data=raw_data
confidence=0.9,
raw_data={
'correlation_value': value,
'original_attribute': attribute,
'correlation_type': 'attribute_matching',
'correlation_size': len(nodes)
}
)
created_relationships.add(relationship_key)

View File

@ -18,7 +18,6 @@ class CrtShProvider(BaseProvider):
Provider for querying crt.sh certificate transparency database.
FIXED: Improved caching logic and error handling to prevent infinite retry loops.
Returns standardized ProviderResult objects with caching support.
UPDATED: Enhanced with certificate timestamps for time-based edge coloring.
"""
def __init__(self, name=None, session_config=None):
@ -132,7 +131,6 @@ class CrtShProvider(BaseProvider):
def query_domain(self, domain: str) -> ProviderResult:
"""
FIXED: Simplified and more robust domain querying with better error handling.
UPDATED: Enhanced with certificate timestamps for time-based edge coloring.
"""
if not _is_valid_domain(domain):
return ProviderResult()
@ -247,6 +245,7 @@ class CrtShProvider(BaseProvider):
target_node=rel_data.get("target_node", ""),
relationship_type=rel_data.get("relationship_type", ""),
provider=rel_data.get("provider", self.name),
confidence=float(rel_data.get("confidence", 0.8)),
raw_data=rel_data.get("raw_data", {})
)
except (ValueError, TypeError) as e:
@ -266,6 +265,7 @@ class CrtShProvider(BaseProvider):
value=attr_data.get("value"),
attr_type=attr_data.get("type", "unknown"),
provider=attr_data.get("provider", self.name),
confidence=float(attr_data.get("confidence", 0.9)),
metadata=attr_data.get("metadata", {})
)
except (ValueError, TypeError) as e:
@ -293,6 +293,7 @@ class CrtShProvider(BaseProvider):
"source_node": rel.source_node,
"target_node": rel.target_node,
"relationship_type": rel.relationship_type,
"confidence": rel.confidence,
"provider": rel.provider,
"raw_data": rel.raw_data
} for rel in result.relationships
@ -304,6 +305,7 @@ class CrtShProvider(BaseProvider):
"value": attr.value,
"type": attr.type,
"provider": attr.provider,
"confidence": attr.confidence,
"metadata": attr.metadata
} for attr in result.attributes
]
@ -370,7 +372,6 @@ class CrtShProvider(BaseProvider):
"""
Process certificates to create proper domain and CA nodes.
FIXED: Better error handling and progress tracking.
UPDATED: Enhanced with certificate timestamps for time-based edge coloring.
"""
result = ProviderResult()
@ -390,7 +391,8 @@ class CrtShProvider(BaseProvider):
name="crtsh_data_warning",
value=incompleteness_warning,
attr_type='metadata',
provider=self.name
provider=self.name,
confidence=1.0
)
all_discovered_domains = set()
@ -413,28 +415,16 @@ class CrtShProvider(BaseProvider):
if cert_domains:
all_discovered_domains.update(cert_domains)
# Create CA nodes for certificate issuers with timestamp
# Create CA nodes for certificate issuers
issuer_name = self._parse_issuer_organization(cert_data.get('issuer_name', ''))
if issuer_name and issuer_name not in processed_issuers:
# Enhanced raw_data with certificate timestamp for time-based edge coloring
issuer_raw_data = {'issuer_dn': cert_data.get('issuer_name', '')}
# Add certificate issue date (not_before) as relevance timestamp
not_before = cert_data.get('not_before')
if not_before:
try:
not_before_date = self._parse_certificate_date(not_before)
issuer_raw_data['cert_not_before'] = not_before_date.isoformat()
issuer_raw_data['relevance_timestamp'] = not_before_date.isoformat() # Standardized field
except Exception as e:
self.logger.logger.debug(f"Failed to parse not_before date for issuer: {e}")
result.add_relationship(
source_node=query_domain,
target_node=issuer_name,
relationship_type='crtsh_cert_issuer',
provider=self.name,
raw_data=issuer_raw_data
confidence=0.95,
raw_data={'issuer_dn': cert_data.get('issuer_name', '')}
)
processed_issuers.add(issuer_name)
@ -452,6 +442,7 @@ class CrtShProvider(BaseProvider):
value=value,
attr_type='certificate_data',
provider=self.name,
confidence=0.9,
metadata={'certificate_id': cert_data.get('id')}
)
@ -466,7 +457,7 @@ class CrtShProvider(BaseProvider):
self.logger.logger.info(f"CrtSh query cancelled before relationship creation for domain: {query_domain}")
return result
# Create selective relationships to avoid large entities with enhanced timestamps
# Create selective relationships to avoid large entities
relationships_created = 0
for discovered_domain in all_discovered_domains:
if discovered_domain == query_domain:
@ -476,36 +467,25 @@ class CrtShProvider(BaseProvider):
continue
if self._should_create_relationship(query_domain, discovered_domain):
# Enhanced raw_data with certificate timestamp for domain relationships
domain_raw_data = {'relationship_type': 'certificate_discovery'}
# Find the most recent certificate for this domain pair to use as timestamp
most_recent_cert = self._find_most_recent_cert_for_domains(
certificates, query_domain, discovered_domain
confidence = self._calculate_domain_relationship_confidence(
query_domain, discovered_domain, [], all_discovered_domains
)
if most_recent_cert:
not_before = most_recent_cert.get('not_before')
if not_before:
try:
not_before_date = self._parse_certificate_date(not_before)
domain_raw_data['cert_not_before'] = not_before_date.isoformat()
domain_raw_data['relevance_timestamp'] = not_before_date.isoformat()
except Exception as e:
self.logger.logger.debug(f"Failed to parse not_before date for domain relationship: {e}")
result.add_relationship(
source_node=query_domain,
target_node=discovered_domain,
relationship_type='crtsh_san_certificate',
provider=self.name,
raw_data=domain_raw_data
confidence=confidence,
raw_data={'relationship_type': 'certificate_discovery'}
)
self.log_relationship_discovery(
source_node=query_domain,
target_node=discovered_domain,
relationship_type='crtsh_san_certificate',
raw_data=domain_raw_data,
confidence_score=confidence,
raw_data={'relationship_type': 'certificate_discovery'},
discovery_method="certificate_transparency_analysis"
)
relationships_created += 1
@ -513,31 +493,6 @@ class CrtShProvider(BaseProvider):
self.logger.logger.info(f"CrtSh processing completed for {query_domain}: processed {processed_certs}/{len(certificates)} certificates, {len(all_discovered_domains)} domains, {relationships_created} relationships")
return result
def _find_most_recent_cert_for_domains(self, certificates: List[Dict[str, Any]],
domain1: str, domain2: str) -> Optional[Dict[str, Any]]:
"""
Find the most recent certificate that contains both domains.
Used for determining the relevance timestamp for domain relationships.
"""
most_recent_cert = None
most_recent_date = None
for cert in certificates:
# Check if this certificate contains both domains
cert_domains = self._extract_domains_from_certificate(cert)
if domain1 in cert_domains and domain2 in cert_domains:
not_before = cert.get('not_before')
if not_before:
try:
cert_date = self._parse_certificate_date(not_before)
if most_recent_date is None or cert_date > most_recent_date:
most_recent_date = cert_date
most_recent_cert = cert
except Exception:
continue
return most_recent_cert
# [Rest of the methods remain the same as in the original file]
def _should_create_relationship(self, source_domain: str, target_domain: str) -> bool:
"""
@ -709,6 +664,25 @@ class CrtShProvider(BaseProvider):
return [d for d in final_domains if _is_valid_domain(d)]
def _calculate_domain_relationship_confidence(self, domain1: str, domain2: str,
shared_certificates: List[Dict[str, Any]],
all_discovered_domains: Set[str]) -> float:
"""Calculate confidence score for domain relationship based on various factors."""
base_confidence = 0.9
relationship_context = self._determine_relationship_context(domain2, domain1)
if relationship_context == 'exact_match':
context_bonus = 0.0
elif relationship_context == 'subdomain':
context_bonus = 0.1
elif relationship_context == 'parent_domain':
context_bonus = 0.05
else:
context_bonus = 0.0
final_confidence = base_confidence + context_bonus
return max(0.1, min(1.0, final_confidence))
def _determine_relationship_context(self, cert_domain: str, query_domain: str) -> str:
"""Determine the context of the relationship between certificate domain and query domain."""

View File

@ -2,7 +2,6 @@
from dns import resolver, reversename
from typing import Dict
from datetime import datetime, timezone
from .base_provider import BaseProvider
from core.provider_result import ProviderResult
from utils.helpers import _is_valid_ip, _is_valid_domain, get_ip_version
@ -12,7 +11,6 @@ class DNSProvider(BaseProvider):
"""
Provider for standard DNS resolution and reverse DNS lookups.
Now returns standardized ProviderResult objects with IPv4 and IPv6 support.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
"""
def __init__(self, name=None, session_config=None):
@ -53,7 +51,6 @@ class DNSProvider(BaseProvider):
"""
Query DNS records for the domain to discover relationships and attributes.
FIXED: Now creates separate attributes for each DNS record type.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
Args:
domain: Domain to investigate
@ -65,12 +62,11 @@ class DNSProvider(BaseProvider):
return ProviderResult()
result = ProviderResult()
discovery_time = datetime.now(timezone.utc)
# Query all record types - each gets its own attribute
for record_type in ['A', 'AAAA', 'CNAME', 'MX', 'NS', 'SOA', 'TXT', 'SRV', 'CAA']:
try:
self._query_record(domain, record_type, result, discovery_time)
self._query_record(domain, record_type, result)
#except resolver.NoAnswer:
# This is not an error, just a confirmation that the record doesn't exist.
#self.logger.logger.debug(f"No {record_type} record found for {domain}")
@ -83,7 +79,6 @@ class DNSProvider(BaseProvider):
def query_ip(self, ip: str) -> ProviderResult:
"""
Query reverse DNS for the IP address (supports both IPv4 and IPv6).
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
Args:
ip: IP address to investigate (IPv4 or IPv6)
@ -96,7 +91,6 @@ class DNSProvider(BaseProvider):
result = ProviderResult()
ip_version = get_ip_version(ip)
discovery_time = datetime.now(timezone.utc)
try:
# Perform reverse DNS lookup (works for both IPv4 and IPv6)
@ -118,24 +112,20 @@ class DNSProvider(BaseProvider):
relationship_type = 'dns_a_record'
record_prefix = 'A'
# Enhanced raw_data with discovery timestamp for time-based edge coloring
raw_data = {
'query_type': 'PTR',
'ip_address': ip,
'ip_version': ip_version,
'hostname': hostname,
'ttl': response.ttl,
'discovery_timestamp': discovery_time.isoformat(),
'relevance_timestamp': discovery_time.isoformat() # DNS data is "fresh" when discovered
}
# Add the relationship
result.add_relationship(
source_node=ip,
target_node=hostname,
relationship_type='dns_ptr_record',
provider=self.name,
raw_data=raw_data
confidence=0.8,
raw_data={
'query_type': 'PTR',
'ip_address': ip,
'ip_version': ip_version,
'hostname': hostname,
'ttl': response.ttl
}
)
# Add to PTR records list
@ -146,7 +136,14 @@ class DNSProvider(BaseProvider):
source_node=ip,
target_node=hostname,
relationship_type='dns_ptr_record',
raw_data=raw_data,
confidence_score=0.8,
raw_data={
'query_type': 'PTR',
'ip_address': ip,
'ip_version': ip_version,
'hostname': hostname,
'ttl': response.ttl
},
discovery_method=f"reverse_dns_lookup_ipv{ip_version}"
)
@ -158,6 +155,7 @@ class DNSProvider(BaseProvider):
value=ptr_records,
attr_type='dns_record',
provider=self.name,
confidence=0.8,
metadata={'ttl': response.ttl, 'ip_version': ip_version}
)
@ -172,11 +170,10 @@ class DNSProvider(BaseProvider):
return result
def _query_record(self, domain: str, record_type: str, result: ProviderResult, discovery_time: datetime) -> None:
def _query_record(self, domain: str, record_type: str, result: ProviderResult) -> None:
"""
FIXED: Query DNS records with unique attribute names for each record type.
Enhanced to better handle IPv6 AAAA records.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
"""
try:
self.total_requests += 1
@ -220,20 +217,18 @@ class DNSProvider(BaseProvider):
if record_type in ['A', 'AAAA'] and _is_valid_ip(target):
ip_version = get_ip_version(target)
# Enhanced raw_data with discovery timestamp for time-based edge coloring
raw_data = {
'query_type': record_type,
'domain': domain,
'value': target,
'ttl': response.ttl,
'discovery_timestamp': discovery_time.isoformat(),
'relevance_timestamp': discovery_time.isoformat() # DNS data is "fresh" when discovered
'ttl': response.ttl
}
if ip_version:
raw_data['ip_version'] = ip_version
relationship_type = f"dns_{record_type.lower()}_record"
confidence = 0.8
# Add relationship
result.add_relationship(
@ -241,6 +236,7 @@ class DNSProvider(BaseProvider):
target_node=target,
relationship_type=relationship_type,
provider=self.name,
confidence=confidence,
raw_data=raw_data
)
@ -256,6 +252,7 @@ class DNSProvider(BaseProvider):
source_node=domain,
target_node=target,
relationship_type=relationship_type,
confidence_score=confidence,
raw_data=raw_data,
discovery_method=discovery_method
)
@ -279,6 +276,7 @@ class DNSProvider(BaseProvider):
value=dns_records,
attr_type='dns_record_list',
provider=self.name,
confidence=0.8,
metadata=metadata
)

View File

@ -15,7 +15,6 @@ class ShodanProvider(BaseProvider):
"""
Provider for querying Shodan API for IP address information.
Now returns standardized ProviderResult objects with caching support for IPv4 and IPv6.
UPDATED: Enhanced with last_seen timestamp for time-based edge coloring.
"""
def __init__(self, name=None, session_config=None):
@ -146,7 +145,6 @@ class ShodanProvider(BaseProvider):
"""
Query Shodan for information about an IP address (IPv4 or IPv6), with caching of processed data.
FIXED: Proper 404 handling to prevent unnecessary retries.
UPDATED: Enhanced with last_seen timestamp extraction for time-based edge coloring.
Args:
ip: IP address to investigate (IPv4 or IPv6)
@ -306,6 +304,7 @@ class ShodanProvider(BaseProvider):
target_node=rel_data["target_node"],
relationship_type=rel_data["relationship_type"],
provider=rel_data["provider"],
confidence=rel_data["confidence"],
raw_data=rel_data.get("raw_data", {})
)
@ -317,6 +316,7 @@ class ShodanProvider(BaseProvider):
value=attr_data["value"],
attr_type=attr_data["type"],
provider=attr_data["provider"],
confidence=attr_data["confidence"],
metadata=attr_data.get("metadata", {})
)
@ -336,6 +336,7 @@ class ShodanProvider(BaseProvider):
"source_node": rel.source_node,
"target_node": rel.target_node,
"relationship_type": rel.relationship_type,
"confidence": rel.confidence,
"provider": rel.provider,
"raw_data": rel.raw_data
} for rel in result.relationships
@ -347,6 +348,7 @@ class ShodanProvider(BaseProvider):
"value": attr.value,
"type": attr.type,
"provider": attr.provider,
"confidence": attr.confidence,
"metadata": attr.metadata
} for attr in result.attributes
]
@ -360,40 +362,25 @@ class ShodanProvider(BaseProvider):
"""
VERIFIED: Process Shodan data creating ISP nodes with ASN attributes and proper relationships.
Enhanced to include IP version information for IPv6 addresses.
UPDATED: Enhanced with last_seen timestamp for time-based edge coloring.
"""
result = ProviderResult()
# Determine IP version for metadata
ip_version = get_ip_version(ip)
# Extract last_seen timestamp for time-based edge coloring
last_seen = data.get('last_seen')
# VERIFIED: Extract ISP information and create proper ISP node with ASN
isp_name = data.get('org')
asn_value = data.get('asn')
if isp_name and asn_value:
# Enhanced raw_data with last_seen timestamp
raw_data = {
'asn': asn_value,
'shodan_org': isp_name,
'ip_version': ip_version
}
# Add last_seen timestamp if available
if last_seen:
raw_data['last_seen'] = last_seen
raw_data['relevance_timestamp'] = last_seen # Standardized field for time-based coloring
# Create relationship from IP to ISP
result.add_relationship(
source_node=ip,
target_node=isp_name,
relationship_type='shodan_isp',
provider=self.name,
raw_data=raw_data
confidence=0.9,
raw_data={'asn': asn_value, 'shodan_org': isp_name, 'ip_version': ip_version}
)
# Add ASN as attribute to the ISP node
@ -403,6 +390,7 @@ class ShodanProvider(BaseProvider):
value=asn_value,
attr_type='isp_info',
provider=self.name,
confidence=0.9,
metadata={'description': 'Autonomous System Number from Shodan', 'ip_version': ip_version}
)
@ -413,6 +401,7 @@ class ShodanProvider(BaseProvider):
value=isp_name,
attr_type='isp_info',
provider=self.name,
confidence=0.9,
metadata={'description': 'Organization name from Shodan', 'ip_version': ip_version}
)
@ -427,24 +416,20 @@ class ShodanProvider(BaseProvider):
else:
relationship_type = 'shodan_a_record'
# Enhanced raw_data with last_seen timestamp
hostname_raw_data = {**data, 'ip_version': ip_version}
if last_seen:
hostname_raw_data['last_seen'] = last_seen
hostname_raw_data['relevance_timestamp'] = last_seen
result.add_relationship(
source_node=ip,
target_node=hostname,
relationship_type=relationship_type,
provider=self.name,
raw_data=hostname_raw_data
confidence=0.8,
raw_data={**data, 'ip_version': ip_version}
)
self.log_relationship_discovery(
source_node=ip,
target_node=hostname,
relationship_type=relationship_type,
raw_data=hostname_raw_data,
confidence_score=0.8,
raw_data={**data, 'ip_version': ip_version},
discovery_method=f"shodan_host_lookup_ipv{ip_version}"
)
elif key == 'ports':
@ -456,6 +441,7 @@ class ShodanProvider(BaseProvider):
value=port,
attr_type='shodan_network_info',
provider=self.name,
confidence=0.9,
metadata={'ip_version': ip_version}
)
elif isinstance(value, (str, int, float, bool)) and value is not None:
@ -466,6 +452,7 @@ class ShodanProvider(BaseProvider):
value=value,
attr_type='shodan_info',
provider=self.name,
confidence=0.9,
metadata={'ip_version': ip_version}
)

View File

@ -326,20 +326,6 @@ input[type="text"]:focus, select:focus {
animation: progressGlow 2s ease-in-out infinite alternate;
}
.gradient-bar {
height: 4px;
background: linear-gradient(to right, #6b7280, #00bfff);
border-radius: 2px;
margin: 0.2rem 0;
}
.gradient-labels {
display: flex;
justify-content: space-between;
font-size: 0.6rem;
color: #888;
}
@keyframes progressShimmer {
0% { transform: translateX(-100%); }
100% { transform: translateX(100%); }
@ -394,59 +380,32 @@ input[type="text"]:focus, select:focus {
color: #999;
}
/* Enhanced graph controls layout */
/* Graph Controls */
.graph-controls {
display: flex;
flex-direction: column;
gap: 0.3rem;
position: absolute;
top: 10px;
right: 10px;
background: rgba(26, 26, 26, 0.9);
padding: 0.5rem;
border-radius: 6px;
border: 1px solid #444;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.5);
z-index: 100;
min-width: 200px;
top: 8px;
right: 8px;
z-index: 10;
display: flex;
gap: 0.3rem;
}
.graph-control-btn {
background: linear-gradient(135deg, #2a2a2a 0%, #1e1e1e 100%);
.graph-control-btn, .btn-icon-small {
background: rgba(42, 42, 42, 0.9);
border: 1px solid #555;
color: #c7c7c7;
padding: 0.4rem 0.8rem;
border-radius: 4px;
cursor: pointer;
padding: 0.3rem 0.5rem;
font-family: 'Roboto Mono', monospace;
font-size: 0.8rem;
transition: all 0.2s ease;
text-align: center;
font-size: 0.7rem;
cursor: pointer;
transition: all 0.3s ease;
}
.graph-control-btn:hover {
background: linear-gradient(135deg, #3a3a3a 0%, #2e2e2e 100%);
.graph-control-btn:hover, .btn-icon-small:hover {
border-color: #00ff41;
color: #00ff41;
}
.graph-control-btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.manual-refresh-btn {
background: linear-gradient(135deg, #4a4a2a 0%, #3e3e1e 100%);
border-color: #ffaa00;
color: #ffaa00;
}
.manual-refresh-btn:hover {
background: linear-gradient(135deg, #5a5a3a 0%, #4e4e2e 100%);
color: #ffcc33;
border-color: #ffcc33;
}
.graph-filter-panel {
position: absolute;
bottom: 8px;
@ -541,6 +500,14 @@ input[type="text"]:focus, select:focus {
height: 2px;
}
.legend-edge.high-confidence {
background: #00ff41;
}
.legend-edge.medium-confidence {
background: #ff9900;
}
/* Provider Panel */
.provider-panel {
grid-area: providers;
@ -1020,6 +987,11 @@ input[type="text"]:focus, select:focus {
border-radius: 2px;
}
.confidence-indicator {
font-size: 0.6rem;
letter-spacing: 1px;
}
.node-link-compact {
color: #00aaff;
text-decoration: none;
@ -1123,56 +1095,6 @@ input[type="text"]:focus, select:focus {
border-left: 3px solid #00aaff;
}
.time-control-container {
margin-bottom: 0.5rem;
padding: 0.5rem;
background: rgba(42, 42, 42, 0.3);
border-radius: 4px;
border: 1px solid #444;
}
.time-control-label {
font-size: 0.8rem;
color: #c7c7c7;
margin-bottom: 0.3rem;
display: block;
font-family: 'Roboto Mono', monospace;
}
.time-control-input {
width: 100%;
padding: 0.3rem;
background: #1a1a1a;
border: 1px solid #555;
border-radius: 3px;
color: #c7c7c7;
font-family: 'Roboto Mono', monospace;
font-size: 0.75rem;
}
.time-control-input:focus {
outline: none;
border-color: #00ff41;
box-shadow: 0 0 5px rgba(0, 255, 65, 0.3);
}
.time-gradient-info {
font-size: 0.7rem;
color: #999;
margin-top: 0.3rem;
text-align: center;
font-family: 'Roboto Mono', monospace;
}
/* Edge color legend for time-based gradient */
.time-gradient-legend {
margin-top: 0.5rem;
padding: 0.3rem;
background: rgba(26, 26, 26, 0.5);
border-radius: 3px;
border: 1px solid #333;
}
/* Settings Modal Specific */
.provider-toggle {
appearance: none !important;
@ -1402,16 +1324,16 @@ input[type="password"]:focus {
.provider-list {
grid-template-columns: 1fr;
}
.graph-controls {
position: relative;
top: auto;
right: auto;
margin-bottom: 1rem;
min-width: auto;
}
.time-control-input {
font-size: 0.7rem;
.manual-refresh-btn {
background: rgba(92, 76, 44, 0.9) !important; /* Orange/amber background */
border: 1px solid #7a6a3a !important;
color: #ffcc00 !important; /* Bright yellow text */
}
.manual-refresh-btn:hover {
border-color: #ffcc00 !important;
color: #fff !important;
background: rgba(112, 96, 54, 0.9) !important;
}

File diff suppressed because it is too large Load Diff

View File

@ -224,6 +224,12 @@ class DNScopeApp {
if (e.target === this.elements.settingsModal) this.hideSettingsModal();
});
}
if (this.elements.saveApiKeys) {
this.elements.saveApiKeys.removeEventListener('click', this.saveApiKeys);
}
if (this.elements.resetApiKeys) {
this.elements.resetApiKeys.removeEventListener('click', this.resetApiKeys);
}
// Setup new handlers
const saveSettingsBtn = document.getElementById('save-settings');
@ -849,7 +855,7 @@ class DNScopeApp {
// Do final graph update when scan completes
console.log('Scan completed - performing final graph update');
setTimeout(() => this.updateGraph(), 1000);
setTimeout(() => this.updateGraph(), 100);
break;
case 'failed':
@ -1716,9 +1722,17 @@ class DNScopeApp {
return groups;
}
formatEdgeLabel(relationshipType, confidence) {
if (!relationshipType) return '';
const confidenceText = confidence >= 0.8 ? '●' : confidence >= 0.6 ? '◐' : '○';
return `${relationshipType} ${confidenceText}`;
}
createEdgeTooltip(edge) {
let tooltip = `<div style="font-family: 'Roboto Mono', monospace; font-size: 11px;">`;
tooltip += `<div style="color: #00ff41; font-weight: bold; margin-bottom: 4px;">${edge.label || 'Relationship'}</div>`;
tooltip += `<div style="color: #999; margin-bottom: 2px;">Confidence: ${(edge.confidence_score * 100).toFixed(1)}%</div>`;
// UPDATED: Use raw provider name (no formatting)
if (edge.source_provider) {
@ -1858,7 +1872,7 @@ class DNScopeApp {
html += `
<div class="relationship-compact-item">
<span class="node-link-compact" data-node-id="${innerNodeId}">${innerNodeId}</span>
<button class="graph-control-btn extract-node-btn"
<button class="btn-icon-small extract-node-btn"
title="Extract to graph"
data-large-entity-id="${largeEntityId}"
data-node-id="${innerNodeId}">[+]</button>
@ -1885,6 +1899,8 @@ class DNScopeApp {
`;
node.incoming_edges.forEach(edge => {
const confidence = edge.data.confidence_score || 0;
const confidenceClass = confidence >= 0.8 ? 'high' : confidence >= 0.6 ? 'medium' : 'low';
html += `
<div class="relationship-item">
@ -1893,6 +1909,9 @@ class DNScopeApp {
</div>
<div class="relationship-type">
<span class="relation-label">${edge.data.relationship_type}</span>
<span class="confidence-indicator confidence-${confidenceClass}" title="Confidence: ${(confidence * 100).toFixed(1)}%">
${'●'.repeat(Math.ceil(confidence * 3))}
</span>
</div>
</div>
`;
@ -1911,6 +1930,9 @@ class DNScopeApp {
`;
node.outgoing_edges.forEach(edge => {
const confidence = edge.data.confidence_score || 0;
const confidenceClass = confidence >= 0.8 ? 'high' : confidence >= 0.6 ? 'medium' : 'low';
html += `
<div class="relationship-item">
<div class="relationship-target node-link" data-node-id="${edge.to}">
@ -1918,6 +1940,9 @@ class DNScopeApp {
</div>
<div class="relationship-type">
<span class="relation-label">${edge.data.relationship_type}</span>
<span class="confidence-indicator confidence-${confidenceClass}" title="Confidence: ${(confidence * 100).toFixed(1)}%">
${'●'.repeat(Math.ceil(confidence * 3))}
</span>
</div>
</div>
`;
@ -2337,6 +2362,51 @@ class DNScopeApp {
}
}
/**
* Save API Keys
*/
async saveApiKeys() {
const inputs = this.elements.apiKeyInputs.querySelectorAll('input');
const keys = {};
inputs.forEach(input => {
const provider = input.dataset.provider;
const value = input.value.trim();
if (provider && value) {
keys[provider] = value;
}
});
if (Object.keys(keys).length === 0) {
this.showWarning('No API keys were entered.');
return;
}
try {
const response = await this.apiCall('/api/config/api-keys', 'POST', keys);
if (response.success) {
this.showSuccess(response.message);
this.hideSettingsModal();
this.loadProviders(); // Refresh provider status
} else {
throw new Error(response.error || 'Failed to save API keys');
}
} catch (error) {
this.showError(`Error saving API keys: ${error.message}`);
}
}
/**
* Reset API Key fields
*/
resetApiKeys() {
const inputs = this.elements.apiKeyInputs.querySelectorAll('input');
inputs.forEach(input => {
input.value = '';
});
}
/**
* Make API call to server
* @param {string} endpoint - API endpoint

View File

@ -188,6 +188,7 @@ class ExportManager:
f" - Type: {domain_info['classification']}",
f" - Connected IPs: {len(domain_info['ips'])}",
f" - Certificate Status: {domain_info['cert_status']}",
f" - Relationship Confidence: {domain_info['avg_confidence']:.2f}",
])
if domain_info['security_notes']:
@ -246,9 +247,11 @@ class ExportManager:
])
for rel in key_relationships[:8]: # Top 8 relationships
confidence_desc = self._describe_confidence(rel['confidence'])
report.extend([
f"{rel['source']}{rel['target']}",
f" - Relationship: {self._humanize_relationship_type(rel['type'])}",
f" - Evidence Strength: {confidence_desc} ({rel['confidence']:.2f})",
f" - Discovery Method: {rel['provider']}",
""
])
@ -288,15 +291,21 @@ class ExportManager:
"Data Quality Assessment:",
f"• Total API Requests: {audit_trail.get('session_metadata', {}).get('total_requests', 0)}",
f"• Data Providers Used: {len(audit_trail.get('session_metadata', {}).get('providers_used', []))}",
f"• Relationship Confidence Distribution:",
])
correlation_provider = next((p for p in scanner.providers if p.get_name() == 'correlation'), None)
correlation_count = len(correlation_provider.correlation_index) if correlation_provider else 0
# Confidence distribution
confidence_dist = self._calculate_confidence_distribution(edges)
for level, count in confidence_dist.items():
percentage = (count / len(edges) * 100) if edges else 0
report.extend([
f" - {level.title()} Confidence (≥{self._get_confidence_threshold(level)}): {count} ({percentage:.1f}%)",
])
report.extend([
"",
"Correlation Analysis:",
f"• Entity Correlations Identified: {correlation_count}",
f"• Entity Correlations Identified: {len(scanner.graph.correlation_index)}",
f"• Cross-Reference Validation: {self._count_cross_validated_relationships(edges)} relationships verified by multiple sources",
""
])
@ -366,7 +375,9 @@ class ExportManager:
if len(connected_ips) > 5:
security_notes.append("Multiple IP endpoints")
# Average confidence
domain_edges = [e for e in edges if e['from'] == domain['id']]
avg_confidence = sum(e['confidence_score'] for e in domain_edges) / len(domain_edges) if domain_edges else 0
domain_analysis.append({
'domain': domain['id'],
@ -374,6 +385,7 @@ class ExportManager:
'ips': connected_ips,
'cert_status': cert_status,
'security_notes': security_notes,
'avg_confidence': avg_confidence
})
# Sort by number of connections (most connected first)
@ -468,7 +480,7 @@ class ExportManager:
def _identify_key_relationships(self, edges: List[Dict]) -> List[Dict[str, Any]]:
"""Identify the most significant relationships in the infrastructure."""
# Score relationships by type importance
# Score relationships by confidence and type importance
relationship_importance = {
'dns_a_record': 0.9,
'dns_aaaa_record': 0.9,
@ -479,19 +491,23 @@ class ExportManager:
'dns_ns_record': 0.7
}
edges = []
scored_edges = []
for edge in edges:
base_confidence = edge.get('confidence_score', 0)
type_weight = relationship_importance.get(edge.get('label', ''), 0.5)
combined_score = (base_confidence * 0.7) + (type_weight * 0.3)
edges.append({
scored_edges.append({
'source': edge['from'],
'target': edge['to'],
'type': edge.get('label', ''),
'confidence': base_confidence,
'provider': edge.get('source_provider', ''),
'score': combined_score
})
# Return top relationships by score
return sorted(edges, key=lambda x: x['score'], reverse=True)
return sorted(scored_edges, key=lambda x: x['score'], reverse=True)
def _analyze_certificate_infrastructure(self, nodes: List[Dict]) -> Dict[str, Any]:
"""Analyze certificate infrastructure across all domains."""
@ -554,6 +570,19 @@ class ExportManager:
else:
return "Mixed Status"
def _describe_confidence(self, confidence: float) -> str:
"""Convert confidence score to descriptive text."""
if confidence >= 0.9:
return "Very High"
elif confidence >= 0.8:
return "High"
elif confidence >= 0.6:
return "Medium"
elif confidence >= 0.4:
return "Low"
else:
return "Very Low"
def _humanize_relationship_type(self, rel_type: str) -> str:
"""Convert technical relationship types to human-readable descriptions."""
type_map = {
@ -570,6 +599,26 @@ class ExportManager:
}
return type_map.get(rel_type, rel_type.replace('_', ' ').title())
def _calculate_confidence_distribution(self, edges: List[Dict]) -> Dict[str, int]:
"""Calculate confidence score distribution."""
distribution = {'high': 0, 'medium': 0, 'low': 0}
for edge in edges:
confidence = edge.get('confidence_score', 0)
if confidence >= 0.8:
distribution['high'] += 1
elif confidence >= 0.6:
distribution['medium'] += 1
else:
distribution['low'] += 1
return distribution
def _get_confidence_threshold(self, level: str) -> str:
"""Get confidence threshold for a level."""
thresholds = {'high': '0.80', 'medium': '0.60', 'low': '0.00'}
return thresholds.get(level, '0.00')
def _count_cross_validated_relationships(self, edges: List[Dict]) -> int:
"""Count relationships verified by multiple providers."""
# Group edges by source-target pair